[llvm] [AArch64] Add LRINT/LLRINT/LROUND/LLROUND FP16 lowering without fullfp16 (PR #66174)

Tue Sep 12 23:57:59 PDT 2023

https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/66174:

We apparently somehow had lowering for the STRICT nodes without any handling for the normal operations. This makes sure we support the LRINT and LROUND intrinsics for fp16 when +fullfp16 is not present.

>From 3d3e17ccd7d91ba9a866483b56367913d1a162ed Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 13 Sep 2023 07:56:55 +0100
Subject: [PATCH] [AArch64] Add LRINT/LLRINT/LROUND/LLROUND FP16 lowering
 without fullfp16.

We apparently somehow had lowering for the STRICT nodes without any handling
for the normal operations. This makes sure we support the LRINT and LROUND
intrinsics for fp16 when +fullfp16 is not present.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 13 ++++-
 llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll | 56 ++++++++++++++-----
 .../test/CodeGen/AArch64/llround-conv-fp16.ll | 47 ++++++++++++----
 llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll  | 56 ++++++++++++++-----
 llvm/test/CodeGen/AArch64/lround-conv-fp16.ll | 47 ++++++++++++----
 5 files changed, 172 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index abe100335c23172..1adf3283ac6286f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -707,7 +707,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
     // Round-to-integer need custom lowering for fp16, as Promote doesn't work
     // because the result type is integer.
-    for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
+    for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
+                    ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
                     ISD::STRICT_LLRINT})
       setOperationAction(Op, MVT::f16, Custom);
 
@@ -6183,6 +6184,16 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerVECTOR_DEINTERLEAVE(Op, DAG);
   case ISD::VECTOR_INTERLEAVE:
     return LowerVECTOR_INTERLEAVE(Op, DAG);
+  case ISD::LROUND:
+  case ISD::LLROUND:
+  case ISD::LRINT:
+  case ISD::LLRINT: {
+    assert(Op.getOperand(0).getValueType() == MVT::f16 &&
+           "Expected custom lowering of rounding operations only for f16");
+    SDLoc DL(Op);
+    SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
+    return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
+  }
   case ISD::STRICT_LROUND:
   case ISD::STRICT_LLROUND:
   case ISD::STRICT_LRINT:
diff --git a/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
index 366c337b59f6e32..1adbbab76abf52c 100644
--- a/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
@@ -1,32 +1,62 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
 
-; CHECK-LABEL: testmhhs:
-; CHECK:       frintx  h0, h0
-; CHECK-NEXT:  fcvtzs  x0, h0
-; CHECK:       ret
 define i16 @testmhhs(half %x) {
+; CHECK-NOFP16-LABEL: testmhhs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    frintx s0, s0
+; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhhs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    frintx h0, h0
+; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
   %conv = trunc i64 %0 to i16
   ret i16 %conv
 }
 
-; CHECK-LABEL: testmhws:
-; CHECK:       frintx  h0, h0
-; CHECK-NEXT:  fcvtzs  x0, h0
-; CHECK:       ret
 define i32 @testmhws(half %x) {
+; CHECK-NOFP16-LABEL: testmhws:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    frintx s0, s0
+; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhws:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    frintx h0, h0
+; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: testmhxs:
-; CHECK:       frintx  h0, h0
-; CHECK-NEXT:  fcvtzs  x0, h0
-; CHECK:       ret
 define i64 @testmhxs(half %x) {
+; CHECK-NOFP16-LABEL: testmhxs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    frintx s0, s0
+; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhxs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    frintx h0, h0
+; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
index 5c914c093610198..4bf65e7d6fd088c 100644
--- a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
@@ -1,29 +1,56 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
 
-; CHECK-LABEL: testmhhs:
-; CHECK:       fcvtas  x0, h0
-; CHECK:       ret
 define i16 @testmhhs(half %x) {
+; CHECK-NOFP16-LABEL: testmhhs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhhs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llround.i64.f16(half %x)
   %conv = trunc i64 %0 to i16
   ret i16 %conv
 }
 
-; CHECK-LABEL: testmhws:
-; CHECK:       fcvtas  x0, h0
-; CHECK:       ret
 define i32 @testmhws(half %x) {
+; CHECK-NOFP16-LABEL: testmhws:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhws:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llround.i64.f16(half %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: testmhxs:
-; CHECK:       fcvtas  x0, h0
-; CHECK-NEXT:  ret
 define i64 @testmhxs(half %x) {
+; CHECK-NOFP16-LABEL: testmhxs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhxs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llround.i64.f16(half %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
index d812e2f585bce4f..7557ceac1212f78 100644
--- a/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
@@ -1,32 +1,62 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
 
-; CHECK-LABEL: testmhhs:
-; CHECK:       frintx  h0, h0
-; CHECK-NEXT:  fcvtzs  x0, h0
-; CHECK:       ret
 define i16 @testmhhs(half %x) {
+; CHECK-NOFP16-LABEL: testmhhs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    frintx s0, s0
+; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhhs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    frintx h0, h0
+; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
   %conv = trunc i64 %0 to i16
   ret i16 %conv
 }
 
-; CHECK-LABEL: testmhws:
-; CHECK:       frintx  h0, h0
-; CHECK-NEXT:  fcvtzs  x0, h0
-; CHECK:       ret
 define i32 @testmhws(half %x) {
+; CHECK-NOFP16-LABEL: testmhws:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    frintx s0, s0
+; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhws:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    frintx h0, h0
+; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: testmhxs:
-; CHECK:       frintx  h0, h0
-; CHECK-NEXT:  fcvtzs  x0, h0
-; CHECK:       ret
 define i64 @testmhxs(half %x) {
+; CHECK-NOFP16-LABEL: testmhxs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    frintx s0, s0
+; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhxs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    frintx h0, h0
+; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
index cf81047f65ec923..bf78fd456eac0ed 100644
--- a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
@@ -1,29 +1,56 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16
 
-; CHECK-LABEL: testmhhs:
-; CHECK:       fcvtas  x0, h0
-; CHECK:       ret
 define i16 @testmhhs(half %x) {
+; CHECK-NOFP16-LABEL: testmhhs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhhs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lround.i64.f16(half %x)
   %conv = trunc i64 %0 to i16
   ret i16 %conv
 }
 
-; CHECK-LABEL: testmhws:
-; CHECK:       fcvtas  x0, h0
-; CHECK:       ret
 define i32 @testmhws(half %x) {
+; CHECK-NOFP16-LABEL: testmhws:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhws:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lround.i64.f16(half %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: testmhxs:
-; CHECK:       fcvtas  x0, h0
-; CHECK-NEXT:  ret
 define i64 @testmhxs(half %x) {
+; CHECK-NOFP16-LABEL: testmhxs:
+; CHECK-NOFP16:       // %bb.0: // %entry
+; CHECK-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    ret
+;
+; CHECK-FP16-LABEL: testmhxs:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lround.i64.f16(half %x)
   ret i64 %0