[llvm] r350825 - [AArch64] Fix operation actions for FP16 vector intrinsics
Bryan Chan via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 10 07:02:38 PST 2019
Author: bryanpkc
Date: Thu Jan 10 07:02:37 2019
New Revision: 350825
URL: http://llvm.org/viewvc/llvm-project?rev=350825&view=rev
Log:
[AArch64] Fix operation actions for FP16 vector intrinsics
Summary:
This patch changes the legalization action for some half-precision floating-
point vector intrinsics (FSIN, FLOG, etc.) from Promote to Expand. These ops
are not supported in hardware for half-precision vectors, but promotion is
not always possible (for v8f16 operands). Changing the action to Expand fixes
an assertion failure in the legalizer when the frontend produces such ops.
In addition, a quick microbenchmark shows that, in the v4f16 case,
expanding introduces fewer spills and is therefore slightly faster than
promoting.
Reviewers: t.p.northover, SjoerdMeijer
Reviewed By: SjoerdMeijer
Subscribers: javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D56296
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=350825&r1=350824&r2=350825&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Thu Jan 10 07:02:37 2019
@@ -333,36 +333,38 @@ AArch64TargetLowering::AArch64TargetLowe
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::FREM, MVT::v4f16, Promote);
- setOperationAction(ISD::FREM, MVT::v8f16, Promote);
+ setOperationAction(ISD::FREM, MVT::v4f16, Expand);
+ setOperationAction(ISD::FREM, MVT::v8f16, Expand);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
- setOperationAction(ISD::FPOW, MVT::v4f16, Promote);
- setOperationAction(ISD::FPOW, MVT::v8f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
+ setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+ setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
- setOperationAction(ISD::FCOS, MVT::v4f16, Promote);
- setOperationAction(ISD::FCOS, MVT::v8f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
- setOperationAction(ISD::FSIN, MVT::v4f16, Promote);
- setOperationAction(ISD::FSIN, MVT::v8f16, Promote);
+ setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
- setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
- setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP, MVT::f16, Promote);
- setOperationAction(ISD::FEXP, MVT::v4f16, Promote);
- setOperationAction(ISD::FEXP, MVT::v8f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
+ setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP2, MVT::f16, Promote);
- setOperationAction(ISD::FEXP2, MVT::v4f16, Promote);
- setOperationAction(ISD::FEXP2, MVT::v8f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG, MVT::f16, Promote);
- setOperationAction(ISD::FLOG, MVT::v4f16, Promote);
- setOperationAction(ISD::FLOG, MVT::v8f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
- setOperationAction(ISD::FLOG2, MVT::v4f16, Promote);
- setOperationAction(ISD::FLOG2, MVT::v8f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
- setOperationAction(ISD::FLOG10, MVT::v4f16, Promote);
- setOperationAction(ISD::FLOG10, MVT::v8f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
if (!Subtarget->hasFullFP16()) {
setOperationAction(ISD::SELECT, MVT::f16, Promote);
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll?rev=350825&r1=350824&r2=350825&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll Thu Jan 10 07:02:37 2019
@@ -1,42 +1,332 @@
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 | FileCheck %s --check-prefix=CHECK-FP16
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=-fullfp16 \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP16
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr=+fullfp16 \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP16
-;;; Float vectors
+;;; Half vectors
-%v2f32 = type <2 x float>
%v4f16 = type <4 x half>
-%v8f16 = type <8 x half>
-; CHECK-LABEL: test_v2f32.sqrt:
-define %v2f32 @test_v2f32.sqrt(%v2f32 %a) {
- ; CHECK: fsqrt.2s
- %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a)
- ret %v2f32 %1
-}
define %v4f16 @test_v4f16.sqrt(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.sqrt:
-; CHECK: fsqrt s{{.}}, s{{.}}
-; CHECK: fsqrt s{{.}}, s{{.}}
-; CHECK: fsqrt s{{.}}, s{{.}}
-; CHECK: fsqrt s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.sqrt:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: fsqrt.4h
-; CHECK-FP16-NEXT: ret
+ ; CHECK-LABEL: test_v4f16.sqrt:
+ ; CHECK-NOFP16-COUNT-4: fsqrt s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: fsqrt.4h
+ ; CHECK-FP16-NEXT: ret
%1 = call %v4f16 @llvm.sqrt.v4f16(%v4f16 %a)
ret %v4f16 %1
}
-define %v8f16 @test_v8f16.sqrt(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 fsqrt tests, so skipped for -fullfp16.
+define %v4f16 @test_v4f16.powi(%v4f16 %a, i32 %b) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v4f16.powi:
+ ; CHECK-COUNT-4: bl __powi
+ %1 = call %v4f16 @llvm.powi.v4f16(%v4f16 %a, i32 %b)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.sin(%v4f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v4f16.sin:
+ ; CHECK-COUNT-4: bl sinf
+ %1 = call %v4f16 @llvm.sin.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.cos(%v4f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v4f16.cos:
+ ; CHECK-COUNT-4: bl cosf
+ %1 = call %v4f16 @llvm.cos.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.pow(%v4f16 %a, %v4f16 %b) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v4f16.pow:
+ ; CHECK-COUNT-4: bl pow
+ %1 = call %v4f16 @llvm.pow.v4f16(%v4f16 %a, %v4f16 %b)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.exp(%v4f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v4f16.exp:
+ ; CHECK-COUNT-4: bl exp
+ %1 = call %v4f16 @llvm.exp.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.exp2(%v4f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v4f16.exp2:
+ ; CHECK-COUNT-4: bl exp2
+ %1 = call %v4f16 @llvm.exp2.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.log(%v4f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v4f16.log:
+ ; CHECK-COUNT-4: bl log
+ %1 = call %v4f16 @llvm.log.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.log10(%v4f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v4f16.log10:
+ ; CHECK-COUNT-4: bl log10
+ %1 = call %v4f16 @llvm.log10.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.log2(%v4f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v4f16.log2:
+ ; CHECK-COUNT-4: bl log2
+ %1 = call %v4f16 @llvm.log2.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.fma(%v4f16 %a, %v4f16 %b, %v4f16 %c) {
+ ; CHECK-LABEL: test_v4f16.fma:
+ ; CHECK-NOFP16-COUNT-4: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: fmla.4h
+ %1 = call %v4f16 @llvm.fma.v4f16(%v4f16 %a, %v4f16 %b, %v4f16 %c)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.fabs(%v4f16 %a) {
+ ; CHECK-LABEL: test_v4f16.fabs:
+ ; CHECK-NOFP16-COUNT-4: fabs s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: fabs.4h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v4f16 @llvm.fabs.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.floor(%v4f16 %a) {
+ ; CHECK-LABEL: test_v4f16.floor:
+ ; CHECK-NOFP16-COUNT-4: frintm s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frintm.4h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v4f16 @llvm.floor.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.ceil(%v4f16 %a) {
+ ; CHECK-LABEL: test_v4f16.ceil:
+ ; CHECK-NOFP16-COUNT-4: frintp s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frintp.4h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v4f16 @llvm.ceil.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.trunc(%v4f16 %a) {
+ ; CHECK-LABEL: test_v4f16.trunc:
+ ; CHECK-NOFP16-COUNT-4: frintz s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frintz.4h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v4f16 @llvm.trunc.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.rint(%v4f16 %a) {
+ ; CHECK-LABEL: test_v4f16.rint:
+ ; CHECK-NOFP16-COUNT-4: frintx s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frintx.4h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v4f16 @llvm.rint.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
+define %v4f16 @test_v4f16.nearbyint(%v4f16 %a) {
+ ; CHECK-LABEL: test_v4f16.nearbyint:
+ ; CHECK-NOFP16-COUNT-4: frinti s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frinti.4h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v4f16 @llvm.nearbyint.v4f16(%v4f16 %a)
+ ret %v4f16 %1
+}
-; CHECK-FP16-LABEL: test_v8f16.sqrt:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: fsqrt.8h
-; CHECK-FP16-NEXT: ret
+declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0
+declare %v4f16 @llvm.powi.v4f16(%v4f16, i32) #0
+declare %v4f16 @llvm.sin.v4f16(%v4f16) #0
+declare %v4f16 @llvm.cos.v4f16(%v4f16) #0
+declare %v4f16 @llvm.pow.v4f16(%v4f16, %v4f16) #0
+declare %v4f16 @llvm.exp.v4f16(%v4f16) #0
+declare %v4f16 @llvm.exp2.v4f16(%v4f16) #0
+declare %v4f16 @llvm.log.v4f16(%v4f16) #0
+declare %v4f16 @llvm.log10.v4f16(%v4f16) #0
+declare %v4f16 @llvm.log2.v4f16(%v4f16) #0
+declare %v4f16 @llvm.fma.v4f16(%v4f16, %v4f16, %v4f16) #0
+declare %v4f16 @llvm.fabs.v4f16(%v4f16) #0
+declare %v4f16 @llvm.floor.v4f16(%v4f16) #0
+declare %v4f16 @llvm.ceil.v4f16(%v4f16) #0
+declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0
+declare %v4f16 @llvm.rint.v4f16(%v4f16) #0
+declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0
+
+;;;
+
+%v8f16 = type <8 x half>
+
+define %v8f16 @test_v8f16.sqrt(%v8f16 %a) {
+ ; CHECK-LABEL: test_v8f16.sqrt:
+ ; CHECK-NOFP16-COUNT-8: fsqrt s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: fsqrt.8h
+ ; CHECK-FP16-NEXT: ret
%1 = call %v8f16 @llvm.sqrt.v8f16(%v8f16 %a)
ret %v8f16 %1
}
+define %v8f16 @test_v8f16.powi(%v8f16 %a, i32 %b) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v8f16.powi:
+ ; CHECK-COUNT-8: bl __powi
+ %1 = call %v8f16 @llvm.powi.v8f16(%v8f16 %a, i32 %b)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.sin(%v8f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v8f16.sin:
+ ; CHECK-COUNT-8: bl sinf
+ %1 = call %v8f16 @llvm.sin.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.cos(%v8f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v8f16.cos:
+ ; CHECK-COUNT-8: bl cosf
+ %1 = call %v8f16 @llvm.cos.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.pow(%v8f16 %a, %v8f16 %b) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v8f16.pow:
+ ; CHECK-COUNT-8: bl pow
+ %1 = call %v8f16 @llvm.pow.v8f16(%v8f16 %a, %v8f16 %b)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.exp(%v8f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v8f16.exp:
+ ; CHECK-COUNT-8: bl exp
+ %1 = call %v8f16 @llvm.exp.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.exp2(%v8f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v8f16.exp2:
+ ; CHECK-COUNT-8: bl exp2
+ %1 = call %v8f16 @llvm.exp2.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.log(%v8f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v8f16.log:
+ ; CHECK-COUNT-8: bl log
+ %1 = call %v8f16 @llvm.log.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.log10(%v8f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v8f16.log10:
+ ; CHECK-COUNT-8: bl log10
+ %1 = call %v8f16 @llvm.log10.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.log2(%v8f16 %a) {
+ ; This operation is expanded, whether with or without +fullfp16.
+ ; CHECK-LABEL: test_v8f16.log2:
+ ; CHECK-COUNT-8: bl log2
+ %1 = call %v8f16 @llvm.log2.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.fma(%v8f16 %a, %v8f16 %b, %v8f16 %c) {
+ ; CHECK-LABEL: test_v8f16.fma:
+ ; CHECK-NOFP16-COUNT-8: fmadd s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: fmla.8h
+ %1 = call %v8f16 @llvm.fma.v8f16(%v8f16 %a, %v8f16 %b, %v8f16 %c)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.fabs(%v8f16 %a) {
+ ; CHECK-LABEL: test_v8f16.fabs:
+ ; CHECK-NOFP16-COUNT-8: fabs s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: fabs.8h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v8f16 @llvm.fabs.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.floor(%v8f16 %a) {
+ ; CHECK-LABEL: test_v8f16.floor:
+ ; CHECK-NOFP16-COUNT-8: frintm s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frintm.8h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v8f16 @llvm.floor.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.ceil(%v8f16 %a) {
+ ; CHECK-LABEL: test_v8f16.ceil:
+ ; CHECK-NOFP16-COUNT-8: frintp s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frintp.8h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v8f16 @llvm.ceil.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.trunc(%v8f16 %a) {
+ ; CHECK-LABEL: test_v8f16.trunc:
+ ; CHECK-NOFP16-COUNT-8: frintz s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frintz.8h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v8f16 @llvm.trunc.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.rint(%v8f16 %a) {
+ ; CHECK-LABEL: test_v8f16.rint:
+ ; CHECK-NOFP16-COUNT-8: frintx s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frintx.8h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v8f16 @llvm.rint.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
+define %v8f16 @test_v8f16.nearbyint(%v8f16 %a) {
+ ; CHECK-LABEL: test_v8f16.nearbyint:
+ ; CHECK-NOFP16-COUNT-8: frinti s{{[0-9]+}}, s{{[0-9]+}}
+ ; CHECK-FP16-NOT: fcvt
+ ; CHECK-FP16: frinti.8h
+ ; CHECK-FP16-NEXT: ret
+ %1 = call %v8f16 @llvm.nearbyint.v8f16(%v8f16 %a)
+ ret %v8f16 %1
+}
+
+declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0
+declare %v8f16 @llvm.powi.v8f16(%v8f16, i32) #0
+declare %v8f16 @llvm.sin.v8f16(%v8f16) #0
+declare %v8f16 @llvm.cos.v8f16(%v8f16) #0
+declare %v8f16 @llvm.pow.v8f16(%v8f16, %v8f16) #0
+declare %v8f16 @llvm.exp.v8f16(%v8f16) #0
+declare %v8f16 @llvm.exp2.v8f16(%v8f16) #0
+declare %v8f16 @llvm.log.v8f16(%v8f16) #0
+declare %v8f16 @llvm.log10.v8f16(%v8f16) #0
+declare %v8f16 @llvm.log2.v8f16(%v8f16) #0
+declare %v8f16 @llvm.fma.v8f16(%v8f16, %v8f16, %v8f16) #0
+declare %v8f16 @llvm.fabs.v8f16(%v8f16) #0
+declare %v8f16 @llvm.floor.v8f16(%v8f16) #0
+declare %v8f16 @llvm.ceil.v8f16(%v8f16) #0
+declare %v8f16 @llvm.trunc.v8f16(%v8f16) #0
+declare %v8f16 @llvm.rint.v8f16(%v8f16) #0
+declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0
+
+;;; Float vectors
+
+%v2f32 = type <2 x float>
+
+; CHECK-LABEL: test_v2f32.sqrt:
+define %v2f32 @test_v2f32.sqrt(%v2f32 %a) {
+ ; CHECK: fsqrt.2s
+ %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a)
+ ret %v2f32 %1
+}
; CHECK: test_v2f32.powi:
define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) {
; CHECK: pow
@@ -97,211 +387,44 @@ define %v2f32 @test_v2f32.fma(%v2f32 %a,
%1 = call %v2f32 @llvm.fma.v2f32(%v2f32 %a, %v2f32 %b, %v2f32 %c)
ret %v2f32 %1
}
-define %v4f16 @test_v4f16.fma(%v4f16 %a, %v4f16 %b, %v4f16 %c) {
-; CHECK-LABEL: test_v4f16.fma:
-; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}}
-; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}}
-; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}}
-; CHECK: fmadd s{{.}}, s{{.}}, s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.fma:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: fmla.4h
- %1 = call %v4f16 @llvm.fma.v4f16(%v4f16 %a, %v4f16 %b, %v4f16 %c)
- ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.fma(%v8f16 %a, %v8f16 %b, %v8f16 %c) {
-; Filechecks are unwieldy with 16 fcvt and 8 fma tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.fma:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: fmla.8h
- %1 = call %v8f16 @llvm.fma.v8f16(%v8f16 %a, %v8f16 %b, %v8f16 %c)
- ret %v8f16 %1
-}
; CHECK-LABEL: test_v2f32.fabs:
define %v2f32 @test_v2f32.fabs(%v2f32 %a) {
; CHECK: fabs.2s
%1 = call %v2f32 @llvm.fabs.v2f32(%v2f32 %a)
ret %v2f32 %1
}
-define %v4f16 @test_v4f16.fabs(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.fabs:
-; CHECK: fabs s{{.}}, s{{.}}
-; CHECK: fabs s{{.}}, s{{.}}
-; CHECK: fabs s{{.}}, s{{.}}
-; CHECK: fabs s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.fabs:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: fabs.4h
-; CHECK-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.fabs.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.fabs(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 fabs tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.fabs:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: fabs.8h
-; CHECK-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.fabs.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
; CHECK-LABEL: test_v2f32.floor:
define %v2f32 @test_v2f32.floor(%v2f32 %a) {
; CHECK: frintm.2s
%1 = call %v2f32 @llvm.floor.v2f32(%v2f32 %a)
ret %v2f32 %1
}
-define %v4f16 @test_v4f16.floor(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.floor:
-; CHECK: frintm s{{.}}, s{{.}}
-; CHECK: frintm s{{.}}, s{{.}}
-; CHECK: frintm s{{.}}, s{{.}}
-; CHECK: frintm s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.floor:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: frintm.4h
-; CHECK-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.floor.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.floor(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 frintm tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.floor:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: frintm.8h
-; CHECK-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.floor.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
; CHECK-LABEL: test_v2f32.ceil:
define %v2f32 @test_v2f32.ceil(%v2f32 %a) {
; CHECK: frintp.2s
%1 = call %v2f32 @llvm.ceil.v2f32(%v2f32 %a)
ret %v2f32 %1
}
-define %v4f16 @test_v4f16.ceil(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.ceil:
-; CHECK: frintp s{{.}}, s{{.}}
-; CHECK: frintp s{{.}}, s{{.}}
-; CHECK: frintp s{{.}}, s{{.}}
-; CHECK: frintp s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.ceil:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: frintp.4h
-; CHECK-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.ceil.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.ceil(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.ceil:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: frintp.8h
-; CHECK-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.ceil.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
; CHECK-LABEL: test_v2f32.trunc:
define %v2f32 @test_v2f32.trunc(%v2f32 %a) {
; CHECK: frintz.2s
%1 = call %v2f32 @llvm.trunc.v2f32(%v2f32 %a)
ret %v2f32 %1
}
-define %v4f16 @test_v4f16.trunc(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.trunc:
-; CHECK: frintz s{{.}}, s{{.}}
-; CHECK: frintz s{{.}}, s{{.}}
-; CHECK: frintz s{{.}}, s{{.}}
-; CHECK: frintz s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.trunc:
-; CHECK-FP16: frintz.4h
-; CHECK-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.trunc.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.trunc(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.trunc:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: frintz.8h
-; CHECK-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.trunc.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
; CHECK-LABEL: test_v2f32.rint:
define %v2f32 @test_v2f32.rint(%v2f32 %a) {
; CHECK: frintx.2s
%1 = call %v2f32 @llvm.rint.v2f32(%v2f32 %a)
ret %v2f32 %1
}
-define %v4f16 @test_v4f16.rint(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.rint:
-; CHECK: frintx s{{.}}, s{{.}}
-; CHECK: frintx s{{.}}, s{{.}}
-; CHECK: frintx s{{.}}, s{{.}}
-; CHECK: frintx s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.rint:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: frintx.4h
-; CHECK-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.rint.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.rint(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.rint:
-; CHECK-FP16: frintx.8h
-; CHECK-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.rint.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
; CHECK-LABEL: test_v2f32.nearbyint:
define %v2f32 @test_v2f32.nearbyint(%v2f32 %a) {
; CHECK: frinti.2s
%1 = call %v2f32 @llvm.nearbyint.v2f32(%v2f32 %a)
ret %v2f32 %1
}
-define %v4f16 @test_v4f16.nearbyint(%v4f16 %a) {
-; CHECK-LABEL: test_v4f16.nearbyint:
-; CHECK: frinti s{{.}}, s{{.}}
-; CHECK: frinti s{{.}}, s{{.}}
-; CHECK: frinti s{{.}}, s{{.}}
-; CHECK: frinti s{{.}}, s{{.}}
-
-; CHECK-FP16-LABEL: test_v4f16.nearbyint:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: frinti.4h
-; CHECK-FP16-NEXT: ret
- %1 = call %v4f16 @llvm.nearbyint.v4f16(%v4f16 %a)
- ret %v4f16 %1
-}
-define %v8f16 @test_v8f16.nearbyint(%v8f16 %a) {
-; Filechecks are unwieldy with 16 fcvt and 8 frint tests, so skipped for -fullfp16.
-
-; CHECK-FP16-LABEL: test_v8f16.nearbyint:
-; CHECK-FP16-NOT: fcvt
-; CHECK-FP16: frinti.8h
-; CHECK-FP16-NEXT: ret
- %1 = call %v8f16 @llvm.nearbyint.v8f16(%v8f16 %a)
- ret %v8f16 %1
-}
declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0
-declare %v4f16 @llvm.sqrt.v4f16(%v4f16) #0
-declare %v8f16 @llvm.sqrt.v8f16(%v8f16) #0
-
declare %v2f32 @llvm.powi.v2f32(%v2f32, i32) #0
declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
@@ -311,38 +434,18 @@ declare %v2f32 @llvm.exp2.v2f32(%v2f32)
declare %v2f32 @llvm.log.v2f32(%v2f32) #0
declare %v2f32 @llvm.log10.v2f32(%v2f32) #0
declare %v2f32 @llvm.log2.v2f32(%v2f32) #0
-
declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0
-declare %v4f16 @llvm.fma.v4f16(%v4f16, %v4f16, %v4f16) #0
-declare %v8f16 @llvm.fma.v8f16(%v8f16, %v8f16, %v8f16) #0
-
declare %v2f32 @llvm.fabs.v2f32(%v2f32) #0
-declare %v4f16 @llvm.fabs.v4f16(%v4f16) #0
-declare %v8f16 @llvm.fabs.v8f16(%v8f16) #0
-
declare %v2f32 @llvm.floor.v2f32(%v2f32) #0
-declare %v4f16 @llvm.floor.v4f16(%v4f16) #0
-declare %v8f16 @llvm.floor.v8f16(%v8f16) #0
-
declare %v2f32 @llvm.ceil.v2f32(%v2f32) #0
-declare %v4f16 @llvm.ceil.v4f16(%v4f16) #0
-declare %v8f16 @llvm.ceil.v8f16(%v8f16) #0
-
declare %v2f32 @llvm.trunc.v2f32(%v2f32) #0
-declare %v4f16 @llvm.trunc.v4f16(%v4f16) #0
-declare %v8f16 @llvm.trunc.v8f16(%v8f16) #0
-
declare %v2f32 @llvm.rint.v2f32(%v2f32) #0
-declare %v4f16 @llvm.rint.v4f16(%v4f16) #0
-declare %v8f16 @llvm.rint.v8f16(%v8f16) #0
-
declare %v2f32 @llvm.nearbyint.v2f32(%v2f32) #0
-declare %v4f16 @llvm.nearbyint.v4f16(%v4f16) #0
-declare %v8f16 @llvm.nearbyint.v8f16(%v8f16) #0
;;;
%v4f32 = type <4 x float>
+
; CHECK: test_v4f32.sqrt:
define %v4f32 @test_v4f32.sqrt(%v4f32 %a) {
; CHECK: fsqrt.4s
More information about the llvm-commits
mailing list