[llvm] [AArch64] Add LRINT/LLRINT/LROUND/LLROUND FP16 lowering without fullfp16 (PR #66174)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 12 23:59:06 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
<details>
<summary>Changes</summary>
We apparently somehow had lowering for the STRICT nodes without any handling for the normal operations. This makes sure we support the LRINT and LROUND intrinsics for fp16 when +fullfp16 is not present.
--
Full diff: https://github.com/llvm/llvm-project/pull/66174.diff
5 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+12-1)
- (modified) llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll (+43-13)
- (modified) llvm/test/CodeGen/AArch64/llround-conv-fp16.ll (+37-10)
- (modified) llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll (+43-13)
- (modified) llvm/test/CodeGen/AArch64/lround-conv-fp16.ll (+37-10)
<pre>
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index abe100335c23172..1adf3283ac6286f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -707,7 +707,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Round-to-integer need custom lowering for fp16, as Promote doesn't work
// because the result type is integer.
- for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
+ for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
+ ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
ISD::STRICT_LLRINT})
setOperationAction(Op, MVT::f16, Custom);
@@ -6183,6 +6184,16 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerVECTOR_DEINTERLEAVE(Op, DAG);
case ISD::VECTOR_INTERLEAVE:
return LowerVECTOR_INTERLEAVE(Op, DAG);
+ case ISD::LROUND:
+ case ISD::LLROUND:
+ case ISD::LRINT:
+ case ISD::LLRINT: {
+ assert(Op.getOperand(0).getValueType() == MVT::f16 &&
+ "Expected custom lowering of rounding operations only for f16");
+ SDLoc DL(Op);
+ SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
+ }
case ISD::STRICT_LROUND:
case ISD::STRICT_LLROUND:
case ISD::STRICT_LRINT:
diff --git a/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
index 366c337b59f6e32..1adbbab76abf52c 100644
--- a/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
@@ -1,32 +1,62 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
-; CHECK-LABEL: testmhhs:
-; CHECK: frintx h0, h0
-; CHECK-NEXT: fcvtzs x0, h0
-; CHECK: ret
define i16 @testmhhs(half %x) {
+; CHECK-NOFP16-LABEL: testmhhs:
+; CHECK-NOFP16: // %bb.0: // %entry
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frintx s0, s0
+; CHECK-NOFP16-NEXT: fcvtzs x0, s0
+; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: testmhhs:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: frintx h0, h0
+; CHECK-FP16-NEXT: fcvtzs x0, h0
+; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT: ret
entry:
%0 = tail call i64 @llvm.llrint.i64.f16(half %x)
%conv = trunc i64 %0 to i16
ret i16 %conv
}
-; CHECK-LABEL: testmhws:
-; CHECK: frintx h0, h0
-; CHECK-NEXT: fcvtzs x0, h0
-; CHECK: ret
define i32 @testmhws(half %x) {
+; CHECK-NOFP16-LABEL: testmhws:
+; CHECK-NOFP16: // %bb.0: // %entry
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frintx s0, s0
+; CHECK-NOFP16-NEXT: fcvtzs x0, s0
+; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: testmhws:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: frintx h0, h0
+; CHECK-FP16-NEXT: fcvtzs x0, h0
+; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT: ret
entry:
%0 = tail call i64 @llvm.llrint.i64.f16(half %x)
%conv = trunc i64 %0 to i32
ret i32 %conv
}
-; CHECK-LABEL: testmhxs:
-; CHECK: frintx h0, h0
-; CHECK-NEXT: fcvtzs x0, h0
-; CHECK: ret
define i64 @testmhxs(half %x) {
+; CHECK-NOFP16-LABEL: testmhxs:
+; CHECK-NOFP16: // %bb.0: // %entry
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frintx s0, s0
+; CHECK-NOFP16-NEXT: fcvtzs x0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: testmhxs:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: frintx h0, h0
+; CHECK-FP16-NEXT: fcvtzs x0, h0
+; CHECK-FP16-NEXT: ret
entry:
%0 = tail call i64 @llvm.llrint.i64.f16(half %x)
ret i64 %0
diff --git a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
index 5c914c093610198..4bf65e7d6fd088c 100644
--- a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
@@ -1,29 +1,56 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
-; CHECK-LABEL: testmhhs:
-; CHECK: fcvtas x0, h0
-; CHECK: ret
define i16 @testmhhs(half %x) {
+; CHECK-NOFP16-LABEL: testmhhs:
+; CHECK-NOFP16: // %bb.0: // %entry
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvtas x0, s0
+; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: testmhhs:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtas x0, h0
+; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT: ret
entry:
%0 = tail call i64 @llvm.llround.i64.f16(half %x)
%conv = trunc i64 %0 to i16
ret i16 %conv
}
-; CHECK-LABEL: testmhws:
-; CHECK: fcvtas x0, h0
-; CHECK: ret
define i32 @testmhws(half %x) {
+; CHECK-NOFP16-LABEL: testmhws:
+; CHECK-NOFP16: // %bb.0: // %entry
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvtas x0, s0
+; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: testmhws:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtas x0, h0
+; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT: ret
entry:
%0 = tail call i64 @llvm.llround.i64.f16(half %x)
%conv = trunc i64 %0 to i32
ret i32 %conv
}
-; CHECK-LABEL: testmhxs:
-; CHECK: fcvtas x0, h0
-; CHECK-NEXT: ret
define i64 @testmhxs(half %x) {
+; CHECK-NOFP16-LABEL: testmhxs:
+; CHECK-NOFP16: // %bb.0: // %entry
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvtas x0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: testmhxs:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtas x0, h0
+; CHECK-FP16-NEXT: ret
entry:
%0 = tail call i64 @llvm.llround.i64.f16(half %x)
ret i64 %0
diff --git a/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
index d812e2f585bce4f..7557ceac1212f78 100644
--- a/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
@@ -1,32 +1,62 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
-; CHECK-LABEL: testmhhs:
-; CHECK: frintx h0, h0
-; CHECK-NEXT: fcvtzs x0, h0
-; CHECK: ret
define i16 @testmhhs(half %x) {
+; CHECK-NOFP16-LABEL: testmhhs:
+; CHECK-NOFP16: // %bb.0: // %entry
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frintx s0, s0
+; CHECK-NOFP16-NEXT: fcvtzs x0, s0
+; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: testmhhs:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: frintx h0, h0
+; CHECK-FP16-NEXT: fcvtzs x0, h0
+; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT: ret
entry:
%0 = tail call i64 @llvm.lrint.i64.f16(half %x)
%conv = trunc i64 %0 to i16
ret i16 %conv
}
-; CHECK-LABEL: testmhws:
-; CHECK: frintx h0, h0
-; CHECK-NEXT: fcvtzs x0, h0
-; CHECK: ret
define i32 @testmhws(half %x) {
+; CHECK-NOFP16-LABEL: testmhws:
+; CHECK-NOFP16: // %bb.0: // %entry
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frintx s0, s0
+; CHECK-NOFP16-NEXT: fcvtzs x0, s0
+; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: testmhws:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: frintx h0, h0
+; CHECK-FP16-NEXT: fcvtzs x0, h0
+; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT: ret
entry:
%0 = tail call i64 @llvm.lrint.i64.f16(half %x)
%conv = trunc i64 %0 to i32
ret i32 %conv
}
-; CHECK-LABEL: testmhxs:
-; CHECK: frintx h0, h0
-; CHECK-NEXT: fcvtzs x0, h0
-; CHECK: ret
define i64 @testmhxs(half %x) {
+; CHECK-NOFP16-LABEL: testmhxs:
+; CHECK-NOFP16: // %bb.0: // %entry
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: frintx s0, s0
+; CHECK-NOFP16-NEXT: fcvtzs x0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: testmhxs:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: frintx h0, h0
+; CHECK-FP16-NEXT: fcvtzs x0, h0
+; CHECK-FP16-NEXT: ret
entry:
%0 = tail call i64 @llvm.lrint.i64.f16(half %x)
ret i64 %0
diff --git a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
index cf81047f65ec923..bf78fd456eac0ed 100644
--- a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
@@ -1,29 +1,56 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
-; CHECK-LABEL: testmhhs:
-; CHECK: fcvtas x0, h0
-; CHECK: ret
define i16 @testmhhs(half %x) {
+; CHECK-NOFP16-LABEL: testmhhs:
+; CHECK-NOFP16: // %bb.0: // %entry
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvtas x0, s0
+; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: testmhhs:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtas x0, h0
+; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT: ret
entry:
%0 = tail call i64 @llvm.lround.i64.f16(half %x)
%conv = trunc i64 %0 to i16
ret i16 %conv
}
-; CHECK-LABEL: testmhws:
-; CHECK: fcvtas x0, h0
-; CHECK: ret
define i32 @testmhws(half %x) {
+; CHECK-NOFP16-LABEL: testmhws:
+; CHECK-NOFP16: // %bb.0: // %entry
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvtas x0, s0
+; CHECK-NOFP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: testmhws:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtas x0, h0
+; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT: ret
entry:
%0 = tail call i64 @llvm.lround.i64.f16(half %x)
%conv = trunc i64 %0 to i32
ret i32 %conv
}
-; CHECK-LABEL: testmhxs:
-; CHECK: fcvtas x0, h0
-; CHECK-NEXT: ret
define i64 @testmhxs(half %x) {
+; CHECK-NOFP16-LABEL: testmhxs:
+; CHECK-NOFP16: // %bb.0: // %entry
+; CHECK-NOFP16-NEXT: fcvt s0, h0
+; CHECK-NOFP16-NEXT: fcvtas x0, s0
+; CHECK-NOFP16-NEXT: ret
+;
+; CHECK-FP16-LABEL: testmhxs:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtas x0, h0
+; CHECK-FP16-NEXT: ret
entry:
%0 = tail call i64 @llvm.lround.i64.f16(half %x)
ret i64 %0
</pre>
</details>
https://github.com/llvm/llvm-project/pull/66174
More information about the llvm-commits
mailing list