[llvm] [AArch64][GlobalISel] Select *v1f16 for f16->s16 to_int_sat_gi (PR #154562)
Kajetan Puchalski via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 21 04:05:56 PDT 2025
https://github.com/mrkajetanp updated https://github.com/llvm/llvm-project/pull/154562
>From c77c3cf3c520bbbf5c49354659cd57e49d6ab6a0 Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Wed, 20 Aug 2025 15:27:23 +0000
Subject: [PATCH 1/2] [AArch64][GlobalISel] Select *v1f16 for f16->s16
to_int_sat_gi
Conversions from f16 to s16 performed by to_int_sat_gi can be done
directly within FPRs, e.g. `fcvtzs h0, h0`.
Generating this format reduces the number of instruction required for
correct behaviour, as it sidesteps the issues with incorrect saturation
that arise when using `fcvtzs w0, h0` for the same casts.
Signed-off-by: Kajetan Puchalski <kajetan.puchalski at arm.com>
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 29 ++++++++++++-------
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 5 ++--
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 4 +++
.../test/CodeGen/AArch64/fptosi-sat-scalar.ll | 9 ++----
.../test/CodeGen/AArch64/fptoui-sat-scalar.ll | 6 ++--
5 files changed, 29 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 89f88776d832c..4601aa2d223f3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5154,6 +5154,22 @@ def STLTXRW : StoreExclusiveLSUI<0b10, 0, 1, GPR32, "stltxr">;
def STLTXRX : StoreExclusiveLSUI<0b11, 0, 1, GPR64, "stltxr">;
}
+//===----------------------------------------------------------------------===//
+// Advanced SIMD two scalar instructions (floating point to integer conversion)
+//===----------------------------------------------------------------------===//
+
+defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">;
+defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
+defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">;
+defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
+defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">;
+defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
+defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
+defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
+def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
+defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
+defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
+
//===----------------------------------------------------------------------===//
// Scaled floating point to integer conversion instructions.
//===----------------------------------------------------------------------===//
@@ -5202,6 +5218,8 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string IN
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
let Predicates = [HasFullFP16] in {
+ def : Pat<(i16 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # v1f16) f16:$Rn)>;
def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
@@ -6508,17 +6526,6 @@ defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
-defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">;
-defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
-defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">;
-defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
-defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">;
-defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
-defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
-defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
-def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index efc62ea8ff6c2..b0374613a217a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -879,8 +879,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{v2s32, v2s32},
{v4s32, v4s32},
{v2s64, v2s64}})
- .legalFor(HasFP16,
- {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
+ .legalFor(
+ HasFP16,
+ {{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
// Handle types larger than i64 by scalarizing/lowering.
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
.scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 31954e7954c03..02cc5e3b7ade1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -840,6 +840,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_INTRINSIC_LLRINT:
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
break;
+ if (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(16)) {
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
+ break;
+ }
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
case TargetOpcode::G_FCMP: {
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index e3aef487890f9..e40cac866740c 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -693,13 +693,8 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
;
; CHECK-GI-FP16-LABEL: test_signed_i16_f16:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: fcvtzs w8, h0
-; CHECK-GI-FP16-NEXT: mov w9, #32767 // =0x7fff
-; CHECK-GI-FP16-NEXT: cmp w8, w9
-; CHECK-GI-FP16-NEXT: csel w8, w8, w9, lt
-; CHECK-GI-FP16-NEXT: mov w9, #-32768 // =0xffff8000
-; CHECK-GI-FP16-NEXT: cmn w8, #8, lsl #12 // =32768
-; CHECK-GI-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-GI-FP16-NEXT: fcvtzs h0, h0
+; CHECK-GI-FP16-NEXT: fmov w0, s0
; CHECK-GI-FP16-NEXT: ret
%x = call i16 @llvm.fptosi.sat.i16.f16(half %f)
ret i16 %x
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 07e49e331415e..94e20b7ed8480 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -548,10 +548,8 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind {
;
; CHECK-GI-FP16-LABEL: test_unsigned_i16_f16:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: fcvtzu w8, h0
-; CHECK-GI-FP16-NEXT: mov w9, #65535 // =0xffff
-; CHECK-GI-FP16-NEXT: cmp w8, w9
-; CHECK-GI-FP16-NEXT: csel w0, w8, w9, lo
+; CHECK-GI-FP16-NEXT: fcvtzu h0, h0
+; CHECK-GI-FP16-NEXT: fmov w0, s0
; CHECK-GI-FP16-NEXT: ret
%x = call i16 @llvm.fptoui.sat.i16.f16(half %f)
ret i16 %x
>From 217d6ae08f410c1cdbfd8d022abc317a4322f9f4 Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchalski at arm.com>
Date: Thu, 21 Aug 2025 11:05:40 +0000
Subject: [PATCH 2/2] David's review
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 44 +++++++++----------
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 18 +++++---
2 files changed, 34 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 4601aa2d223f3..cac8aa7f88584 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5154,22 +5154,6 @@ def STLTXRW : StoreExclusiveLSUI<0b10, 0, 1, GPR32, "stltxr">;
def STLTXRX : StoreExclusiveLSUI<0b11, 0, 1, GPR64, "stltxr">;
}
-//===----------------------------------------------------------------------===//
-// Advanced SIMD two scalar instructions (floating point to integer conversion)
-//===----------------------------------------------------------------------===//
-
-defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">;
-defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
-defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">;
-defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
-defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">;
-defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
-defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
-defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
-def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
-
//===----------------------------------------------------------------------===//
// Scaled floating point to integer conversion instructions.
//===----------------------------------------------------------------------===//
@@ -5218,12 +5202,10 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string IN
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
let Predicates = [HasFullFP16] in {
- def : Pat<(i16 (to_int_sat_gi f16:$Rn)),
- (!cast<Instruction>(INST # v1f16) f16:$Rn)>;
- def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
- (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
- (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ def : Pat<(i32(to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST#UWHr) f16:$Rn)>;
+ def : Pat<(i64(to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST#UXHr) f16:$Rn)>;
}
def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
@@ -6526,6 +6508,18 @@ defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
+defm FCVTAS : SIMDFPTwoScalar<0, 0, 0b11100, "fcvtas">;
+defm FCVTAU : SIMDFPTwoScalar<1, 0, 0b11100, "fcvtau">;
+defm FCVTMS : SIMDFPTwoScalar<0, 0, 0b11011, "fcvtms">;
+defm FCVTMU : SIMDFPTwoScalar<1, 0, 0b11011, "fcvtmu">;
+defm FCVTNS : SIMDFPTwoScalar<0, 0, 0b11010, "fcvtns">;
+defm FCVTNU : SIMDFPTwoScalar<1, 0, 0b11010, "fcvtnu">;
+defm FCVTPS : SIMDFPTwoScalar<0, 1, 0b11010, "fcvtps">;
+defm FCVTPU : SIMDFPTwoScalar<1, 1, 0b11010, "fcvtpu">;
+def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
+defm FCVTZS : SIMDFPTwoScalar<0, 1, 0b11011, "fcvtzs">;
+defm FCVTZU : SIMDFPTwoScalar<1, 1, 0b11011, "fcvtzu">;
+
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
@@ -6543,6 +6537,12 @@ defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar
defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
int_aarch64_neon_usqadd>;
+// f16 -> s16 conversions
+let Predicates = [HasFullFP16] in {
+ def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>;
+ def : Pat<(i16(fp_to_uint_sat_gi f16:$Rn)), (FCVTZUv1f16 f16:$Rn)>;
+}
+
def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
(CMLTv1i64rz V64:$Rn)>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 02cc5e3b7ade1..c496d47dfb4d4 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -832,18 +832,24 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
break;
}
+ case TargetOpcode::G_FPTOSI_SAT:
+ case TargetOpcode::G_FPTOUI_SAT: {
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+ if (DstType.isVector())
+ break;
+ if (DstType == LLT::scalar(16)) {
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
+ break;
+ }
+ OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
+ break;
+ }
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
- case TargetOpcode::G_FPTOSI_SAT:
- case TargetOpcode::G_FPTOUI_SAT:
case TargetOpcode::G_INTRINSIC_LRINT:
case TargetOpcode::G_INTRINSIC_LLRINT:
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
break;
- if (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(16)) {
- OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
- break;
- }
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
case TargetOpcode::G_FCMP: {
More information about the llvm-commits
mailing list