[llvm] [AArch64] Add LRINT/LLRINT/LROUND/LLROUND FP16 lowering without fullfp16 (PR #66174)

Tue Sep 12 23:59:06 PDT 2023

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64
            
<details>
<summary>Changes</summary>
We apparently somehow had lowering for the STRICT nodes without any handling for the normal operations. This makes sure we support the LRINT and LROUND intrinsics for fp16 when +fullfp16 is not present.
--
Full diff: https://github.com/llvm/llvm-project/pull/66174.diff

5 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+12-1) 
- (modified) llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll (+43-13) 
- (modified) llvm/test/CodeGen/AArch64/llround-conv-fp16.ll (+37-10) 
- (modified) llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll (+43-13) 
- (modified) llvm/test/CodeGen/AArch64/lround-conv-fp16.ll (+37-10) 


<pre>

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index abe100335c23172..1adf3283ac6286f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -707,7 +707,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
     // Round-to-integer need custom lowering for fp16, as Promote doesn&#x27;t work
     // because the result type is integer.
-    for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
+    for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
+                    ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
                     ISD::STRICT_LLRINT})
       setOperationAction(Op, MVT::f16, Custom);
 
@@ -6183,6 +6184,16 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerVECTOR_DEINTERLEAVE(Op, DAG);
   case ISD::VECTOR_INTERLEAVE:
     return LowerVECTOR_INTERLEAVE(Op, DAG);
+  case ISD::LROUND:
+  case ISD::LLROUND:
+  case ISD::LRINT:
+  case ISD::LLRINT: {
+    assert(Op.getOperand(0).getValueType() == MVT::f16 &&
+           "Expected custom lowering of rounding operations only for f16");
+    SDLoc DL(Op);
+    SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
+    return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
+  }
   case ISD::STRICT_LROUND:
   case ISD::STRICT_LLROUND:
   case ISD::STRICT_LRINT:
diff --git a/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
index 366c337b59f6e32..1adbbab76abf52c 100644
--- a/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
@@ -1,32 +1,62 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
 
-; CHECK-LABEL: testmhhs:
-; CHECK:       frintx  h0, h0
-; CHECK-NEXT:  fcvtzs  x0, h0
-; CHECK:       ret
 define i16 @testmhhs(half %x) {
+; CHECK-NOFP16-LABEL: testmhhs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    frintx s0, s0
+; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhhs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    frintx h0, h0
+; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
   %conv = trunc i64 %0 to i16
   ret i16 %conv
 }
 
-; CHECK-LABEL: testmhws:
-; CHECK:       frintx  h0, h0
-; CHECK-NEXT:  fcvtzs  x0, h0
-; CHECK:       ret
 define i32 @testmhws(half %x) {
+; CHECK-NOFP16-LABEL: testmhws:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    frintx s0, s0
+; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhws:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    frintx h0, h0
+; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: testmhxs:
-; CHECK:       frintx  h0, h0
-; CHECK-NEXT:  fcvtzs  x0, h0
-; CHECK:       ret
 define i64 @testmhxs(half %x) {
+; CHECK-NOFP16-LABEL: testmhxs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    frintx s0, s0
+; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhxs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    frintx h0, h0
+; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
index 5c914c093610198..4bf65e7d6fd088c 100644
--- a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
@@ -1,29 +1,56 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
 
-; CHECK-LABEL: testmhhs:
-; CHECK:       fcvtas  x0, h0
-; CHECK:       ret
 define i16 @testmhhs(half %x) {
+; CHECK-NOFP16-LABEL: testmhhs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhhs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llround.i64.f16(half %x)
   %conv = trunc i64 %0 to i16
   ret i16 %conv
 }
 
-; CHECK-LABEL: testmhws:
-; CHECK:       fcvtas  x0, h0
-; CHECK:       ret
 define i32 @testmhws(half %x) {
+; CHECK-NOFP16-LABEL: testmhws:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhws:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llround.i64.f16(half %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: testmhxs:
-; CHECK:       fcvtas  x0, h0
-; CHECK-NEXT:  ret
 define i64 @testmhxs(half %x) {
+; CHECK-NOFP16-LABEL: testmhxs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhxs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llround.i64.f16(half %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
index d812e2f585bce4f..7557ceac1212f78 100644
--- a/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
@@ -1,32 +1,62 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
 
-; CHECK-LABEL: testmhhs:
-; CHECK:       frintx  h0, h0
-; CHECK-NEXT:  fcvtzs  x0, h0
-; CHECK:       ret
 define i16 @testmhhs(half %x) {
+; CHECK-NOFP16-LABEL: testmhhs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    frintx s0, s0
+; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhhs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    frintx h0, h0
+; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
   %conv = trunc i64 %0 to i16
   ret i16 %conv
 }
 
-; CHECK-LABEL: testmhws:
-; CHECK:       frintx  h0, h0
-; CHECK-NEXT:  fcvtzs  x0, h0
-; CHECK:       ret
 define i32 @testmhws(half %x) {
+; CHECK-NOFP16-LABEL: testmhws:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    frintx s0, s0
+; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhws:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    frintx h0, h0
+; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: testmhxs:
-; CHECK:       frintx  h0, h0
-; CHECK-NEXT:  fcvtzs  x0, h0
-; CHECK:       ret
 define i64 @testmhxs(half %x) {
+; CHECK-NOFP16-LABEL: testmhxs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    frintx s0, s0
+; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhxs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    frintx h0, h0
+; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
index cf81047f65ec923..bf78fd456eac0ed 100644
--- a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
@@ -1,29 +1,56 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
 
-; CHECK-LABEL: testmhhs:
-; CHECK:       fcvtas  x0, h0
-; CHECK:       ret
 define i16 @testmhhs(half %x) {
+; CHECK-NOFP16-LABEL: testmhhs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhhs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lround.i64.f16(half %x)
   %conv = trunc i64 %0 to i16
   ret i16 %conv
 }
 
-; CHECK-LABEL: testmhws:
-; CHECK:       fcvtas  x0, h0
-; CHECK:       ret
 define i32 @testmhws(half %x) {
+; CHECK-NOFP16-LABEL: testmhws:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhws:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lround.i64.f16(half %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: testmhxs:
-; CHECK:       fcvtas  x0, h0
-; CHECK-NEXT:  ret
 define i64 @testmhxs(half %x) {
+; CHECK-NOFP16-LABEL: testmhxs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhxs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lround.i64.f16(half %x)
   ret i64 %0
</pre>
</details>


https://github.com/llvm/llvm-project/pull/66174