[llvm] [AArch64][GlobalISel] Lower fp16 abs and neg without fullfp16. (PR #110096)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 26 02:37:37 PDT 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/110096
This changes the existing promote logic to lower, so that it can use normal integer operations. A minor change was needed to fneg lower code to handle vectors.
>From 0b5dfe44c1a074cf026225a99c3942220b607be0 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 26 Sep 2024 10:35:24 +0100
Subject: [PATCH] [AArch64][GlobalISel] Lower fp16 abs and neg without
fullfp16.
This changes the existing promote logic to lower, so that it can use normal
integer operations. A minor change was needed to fneg lower code to handle
vectors.
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 8 +--
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 4 +-
llvm/test/CodeGen/AArch64/f16-instructions.ll | 23 +++-----
llvm/test/CodeGen/AArch64/fabs.ll | 54 +++++--------------
llvm/test/CodeGen/AArch64/fneg.ll | 54 +++++--------------
5 files changed, 40 insertions(+), 103 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index c3b6b3033cf5c4..2fb2d104f1ce34 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4051,12 +4051,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
auto [Res, SubByReg] = MI.getFirst2Regs();
LLT Ty = MRI.getType(Res);
- // TODO: Handle vector types once we are able to
- // represent them.
- if (Ty.isVector())
- return UnableToLegalize;
- auto SignMask =
- MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
+ auto SignMask = MIRBuilder.buildConstant(
+ Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
MIRBuilder.buildXor(Res, SubByReg, SignMask);
MI.eraseFromParent();
return Legalized;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 6cb181011f8f67..51aeee023f2e34 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -268,11 +268,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
})
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
.lowerIf(scalarOrEltWiderThan(0, 64))
- .minScalarOrElt(0, MinFPScalar)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
- .moreElementsToNextPow2(0);
+ .moreElementsToNextPow2(0)
+ .lowerFor({s16, v4s16, v8s16});
getActionDefinitionsBuilder(G_FREM)
.libcallFor({s32, s64})
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index d8a17b40587106..e058c83f274f14 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -1392,26 +1392,19 @@ define half @test_fma(half %a, half %b, half %c) #0 {
}
define half @test_fabs(half %a) #0 {
-; CHECK-CVT-SD-LABEL: test_fabs:
-; CHECK-CVT-SD: // %bb.0:
-; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-SD-NEXT: fmov w8, s0
-; CHECK-CVT-SD-NEXT: and w8, w8, #0x7fff
-; CHECK-CVT-SD-NEXT: fmov s0, w8
-; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 killed $s0
-; CHECK-CVT-SD-NEXT: ret
+; CHECK-CVT-LABEL: test_fabs:
+; CHECK-CVT: // %bb.0:
+; CHECK-CVT-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-CVT-NEXT: fmov w8, s0
+; CHECK-CVT-NEXT: and w8, w8, #0x7fff
+; CHECK-CVT-NEXT: fmov s0, w8
+; CHECK-CVT-NEXT: // kill: def $h0 killed $h0 killed $s0
+; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: test_fabs:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fabs h0, h0
; CHECK-FP16-NEXT: ret
-;
-; CHECK-CVT-GI-LABEL: test_fabs:
-; CHECK-CVT-GI: // %bb.0:
-; CHECK-CVT-GI-NEXT: fcvt s0, h0
-; CHECK-CVT-GI-NEXT: fabs s0, s0
-; CHECK-CVT-GI-NEXT: fcvt h0, s0
-; CHECK-CVT-GI-NEXT: ret
%r = call half @llvm.fabs.f16(half %a)
ret half %r
}
diff --git a/llvm/test/CodeGen/AArch64/fabs.ll b/llvm/test/CodeGen/AArch64/fabs.ll
index e19e2ead11f4d0..43e90070736345 100644
--- a/llvm/test/CodeGen/AArch64/fabs.ll
+++ b/llvm/test/CodeGen/AArch64/fabs.ll
@@ -41,9 +41,11 @@ define half @fabs_f16(half %a) {
;
; CHECK-GI-NOFP16-LABEL: fabs_f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: fabs s0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-GI-NOFP16-NEXT: fmov w8, s0
+; CHECK-GI-NOFP16-NEXT: and w8, w8, #0x7fff
+; CHECK-GI-NOFP16-NEXT: fmov s0, w8
+; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fabs_f16:
@@ -160,22 +162,8 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: fabs_v7f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT: mov v2.h[0], v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1]
-; CHECK-GI-NOFP16-NEXT: fabs v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s
-; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[3]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[1]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v2.h[2]
+; CHECK-GI-NOFP16-NEXT: mvni v1.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fabs_v7f16:
@@ -200,9 +188,8 @@ define <4 x half> @fabs_v4f16(<4 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: fabs_v4f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT: fabs v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT: mvni v1.4h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fabs_v4f16:
@@ -227,12 +214,8 @@ define <8 x half> @fabs_v8f16(<8 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: fabs_v8f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT: fabs v2.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-GI-NOFP16-NEXT: mvni v1.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fabs_v8f16:
@@ -259,18 +242,9 @@ define <16 x half> @fabs_v16f16(<16 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: fabs_v16f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT: fabs v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT: fabs v3.4s, v3.4s
-; CHECK-GI-NOFP16-NEXT: fabs v4.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: fabs v5.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
+; CHECK-GI-NOFP16-NEXT: mvni v2.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fabs_v16f16:
diff --git a/llvm/test/CodeGen/AArch64/fneg.ll b/llvm/test/CodeGen/AArch64/fneg.ll
index a0e9edff733e09..de2671afe60ab7 100644
--- a/llvm/test/CodeGen/AArch64/fneg.ll
+++ b/llvm/test/CodeGen/AArch64/fneg.ll
@@ -41,9 +41,11 @@ define half @fabs_f16(half %a) {
;
; CHECK-GI-NOFP16-LABEL: fabs_f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: fneg s0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 def $s0
+; CHECK-GI-NOFP16-NEXT: fmov w8, s0
+; CHECK-GI-NOFP16-NEXT: eor w8, w8, #0xffff8000
+; CHECK-GI-NOFP16-NEXT: fmov s0, w8
+; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fabs_f16:
@@ -161,22 +163,8 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: fabs_v7f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT: mov v2.h[0], v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fneg v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1]
-; CHECK-GI-NOFP16-NEXT: fneg v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s
-; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[3]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[1]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v2.h[2]
+; CHECK-GI-NOFP16-NEXT: movi v1.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fabs_v7f16:
@@ -202,9 +190,8 @@ define <4 x half> @fabs_v4f16(<4 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: fabs_v4f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT: fneg v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT: movi v1.4h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fabs_v4f16:
@@ -230,12 +217,8 @@ define <8 x half> @fabs_v8f16(<8 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: fabs_v8f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT: fneg v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT: fneg v2.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-GI-NOFP16-NEXT: movi v1.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fabs_v8f16:
@@ -263,18 +246,9 @@ define <16 x half> @fabs_v16f16(<16 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: fabs_v16f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT: fneg v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT: fneg v3.4s, v3.4s
-; CHECK-GI-NOFP16-NEXT: fneg v4.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: fneg v5.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
-; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
+; CHECK-GI-NOFP16-NEXT: movi v2.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT: eor v0.16b, v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT: eor v1.16b, v1.16b, v2.16b
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fabs_v16f16:
More information about the llvm-commits
mailing list