[llvm] [AArch64][GlobalISel] Legalize 128-bit types for FABS (PR #104753)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 3 02:17:19 PDT 2024
https://github.com/Him188 updated https://github.com/llvm/llvm-project/pull/104753
>From 2e386b20c9b55ea51560c2bd1eba0e0478e2fbc0 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Tue, 30 Jul 2024 12:19:52 +0100
Subject: [PATCH 1/8] [AArch64][GlobalISel] Add test cases for 128-bit fabs.
(NFC)
---
llvm/test/CodeGen/AArch64/fabs-fp128.ll | 58 +++++++++++++++++++++++++
1 file changed, 58 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/fabs-fp128.ll
diff --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
new file mode 100644
index 00000000000000..0232397ffc7c67
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel=1 -global-isel-abort=2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK: warning: Instruction selection used fallback path for fabs_f128
+; CHECK-NEXT: warning: Instruction selection used fallback path for fabs_v1f128
+; CHECK-NEXT: warning: Instruction selection used fallback path for fabs_v2f128
+
+define fp128 @fabs_f128(fp128 %a) {
+; CHECK-LABEL: fabs_f128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldrb w8, [sp, #15]
+; CHECK-NEXT: and w8, w8, #0x7f
+; CHECK-NEXT: strb w8, [sp, #15]
+; CHECK-NEXT: ldr q0, [sp], #16
+; CHECK-NEXT: ret
+entry:
+ %c = call fp128 @llvm.fabs.f128(fp128 %a)
+ ret fp128 %c
+}
+
+define <1 x fp128> @fabs_v1f128(<1 x fp128> %a) {
+; CHECK-LABEL: fabs_v1f128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldrb w8, [sp, #15]
+; CHECK-NEXT: and w8, w8, #0x7f
+; CHECK-NEXT: strb w8, [sp, #15]
+; CHECK-NEXT: ldr q0, [sp], #16
+; CHECK-NEXT: ret
+entry:
+ %c = call <1 x fp128> @llvm.fabs.v1f128(<1 x fp128> %a)
+ ret <1 x fp128> %c
+}
+
+define <2 x fp128> @fabs_v2f128(<2 x fp128> %a) {
+; CHECK-LABEL: fabs_v2f128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: stp q0, q1, [sp, #-32]!
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: ldrb w8, [sp, #15]
+; CHECK-NEXT: and w8, w8, #0x7f
+; CHECK-NEXT: strb w8, [sp, #15]
+; CHECK-NEXT: ldrb w8, [sp, #31]
+; CHECK-NEXT: and w8, w8, #0x7f
+; CHECK-NEXT: strb w8, [sp, #31]
+; CHECK-NEXT: ldp q0, q1, [sp], #32
+; CHECK-NEXT: ret
+entry:
+ %c = call <2 x fp128> @llvm.fabs.v2f128(<2 x fp128> %a)
+ ret <2 x fp128> %c
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}
>From c4a8d3845756019dd3f9b8dc00ad32ba61768702 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Tue, 30 Jul 2024 12:19:52 +0100
Subject: [PATCH 2/8] [AArch64][GlobalISel] Legalize 128-bit types for FABS
- Generate AND to clear sign bit for s128
- Vectors are scalarized
---
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 41 ++++++-
.../AArch64/GISel/AArch64LegalizerInfo.h | 2 +
llvm/test/CodeGen/AArch64/fabs-fp128.ll | 100 +++++++++++-------
3 files changed, 102 insertions(+), 41 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 7eaf6a84bd204f..9a7b9e143917f6 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -242,9 +242,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
- G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
- G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
- G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
+ G_FSQRT, G_FMAXNUM, G_FMINNUM, G_FMAXIMUM,
+ G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT,
+ G_FNEARBYINT, G_INTRINSIC_TRUNC,
G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
.legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
.legalIf([=](const LegalityQuery &Query) {
@@ -258,6 +258,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0);
+ getActionDefinitionsBuilder(G_FABS)
+ .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
+ .legalIf([=](const LegalityQuery &Query) {
+ const auto &Ty = Query.Types[0];
+ return (Ty == v8s16 || Ty == v4s16) && HasFP16;
+ })
+ .customFor({s128})
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+ .minScalarOrElt(0, MinFPScalar)
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
+ .moreElementsToNextPow2(0);
+
getActionDefinitionsBuilder(G_FREM)
.libcallFor({s32, s64})
.minScalar(0, s32)
@@ -1346,6 +1360,8 @@ bool AArch64LegalizerInfo::legalizeCustom(
return legalizePrefetch(MI, Helper);
case TargetOpcode::G_ABS:
return Helper.lowerAbsToCNeg(MI);
+ case TargetOpcode::G_FABS:
+ return legalizeFABS(MI, MRI, MIRBuilder);
case TargetOpcode::G_ICMP:
return legalizeICMP(MI, MRI, MIRBuilder);
}
@@ -1406,6 +1422,25 @@ bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
return true;
}
+bool AArch64LegalizerInfo::legalizeFABS(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+
+ constexpr LLT S128 = LLT::scalar(128);
+ if (MRI.getType(SrcReg) != S128 || MRI.getType(DstReg) != S128)
+ return false;
+
+ MIRBuilder.buildAnd(
+ DstReg, SrcReg,
+ MIRBuilder.buildConstant(
+ S128, APInt::getSignedMaxValue(128)));
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 00d85a36e4b2ca..8bf642d1745aa9 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -50,6 +50,8 @@ class AArch64LegalizerInfo : public LegalizerInfo {
LegalizerHelper &Helper) const;
bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
+ bool legalizeFABS(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
bool legalizeICMP(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const;
bool legalizeFunnelShift(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
index 0232397ffc7c67..131af5e0a3281e 100644
--- a/llvm/test/CodeGen/AArch64/fabs-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
@@ -1,58 +1,82 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 -global-isel=1 -global-isel-abort=2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK: warning: Instruction selection used fallback path for fabs_f128
-; CHECK-NEXT: warning: Instruction selection used fallback path for fabs_v1f128
-; CHECK-NEXT: warning: Instruction selection used fallback path for fabs_v2f128
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define fp128 @fabs_f128(fp128 %a) {
-; CHECK-LABEL: fabs_f128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str q0, [sp, #-16]!
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldrb w8, [sp, #15]
-; CHECK-NEXT: and w8, w8, #0x7f
-; CHECK-NEXT: strb w8, [sp, #15]
-; CHECK-NEXT: ldr q0, [sp], #16
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fabs_f128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: str q0, [sp, #-16]!
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: ldrb w8, [sp, #15]
+; CHECK-SD-NEXT: and w8, w8, #0x7f
+; CHECK-SD-NEXT: strb w8, [sp, #15]
+; CHECK-SD-NEXT: ldr q0, [sp], #16
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fabs_f128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: mov v0.d[0], v0.d[0]
+; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%c = call fp128 @llvm.fabs.f128(fp128 %a)
ret fp128 %c
}
define <1 x fp128> @fabs_v1f128(<1 x fp128> %a) {
-; CHECK-LABEL: fabs_v1f128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str q0, [sp, #-16]!
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldrb w8, [sp, #15]
-; CHECK-NEXT: and w8, w8, #0x7f
-; CHECK-NEXT: strb w8, [sp, #15]
-; CHECK-NEXT: ldr q0, [sp], #16
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fabs_v1f128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: str q0, [sp, #-16]!
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: ldrb w8, [sp, #15]
+; CHECK-SD-NEXT: and w8, w8, #0x7f
+; CHECK-SD-NEXT: strb w8, [sp, #15]
+; CHECK-SD-NEXT: ldr q0, [sp], #16
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fabs_v1f128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: mov v0.d[0], v0.d[0]
+; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%c = call <1 x fp128> @llvm.fabs.v1f128(<1 x fp128> %a)
ret <1 x fp128> %c
}
define <2 x fp128> @fabs_v2f128(<2 x fp128> %a) {
-; CHECK-LABEL: fabs_v2f128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp q0, q1, [sp, #-32]!
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: ldrb w8, [sp, #15]
-; CHECK-NEXT: and w8, w8, #0x7f
-; CHECK-NEXT: strb w8, [sp, #15]
-; CHECK-NEXT: ldrb w8, [sp, #31]
-; CHECK-NEXT: and w8, w8, #0x7f
-; CHECK-NEXT: strb w8, [sp, #31]
-; CHECK-NEXT: ldp q0, q1, [sp], #32
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fabs_v2f128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: ldrb w8, [sp, #15]
+; CHECK-SD-NEXT: and w8, w8, #0x7f
+; CHECK-SD-NEXT: strb w8, [sp, #15]
+; CHECK-SD-NEXT: ldrb w8, [sp, #31]
+; CHECK-SD-NEXT: and w8, w8, #0x7f
+; CHECK-SD-NEXT: strb w8, [sp, #31]
+; CHECK-SD-NEXT: ldp q0, q1, [sp], #32
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fabs_v2f128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: mov x9, v1.d[1]
+; CHECK-GI-NEXT: mov v0.d[0], v0.d[0]
+; CHECK-GI-NEXT: mov v1.d[0], v1.d[0]
+; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-GI-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: ret
entry:
%c = call <2 x fp128> @llvm.fabs.v2f128(<2 x fp128> %a)
ret <2 x fp128> %c
}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-GI: {{.*}}
-; CHECK-SD: {{.*}}
+; CHECK: {{.*}}
>From eaa567d56bc2bbf6c42c8cbcbc5869059daabab2 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Mon, 19 Aug 2024 10:58:02 +0100
Subject: [PATCH 3/8] Generalize lowerFAbs in LegalizerHelper
---
.../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 1 +
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 22 ++++++++++++++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 23 +------------------
.../AArch64/GISel/AArch64LegalizerInfo.h | 2 --
4 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 0b2cd299bde12a..afd68250f5ca6e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -437,6 +437,7 @@ class LegalizerHelper {
LegalizeResult lowerAbsToAddXor(MachineInstr &MI);
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI);
LegalizeResult lowerAbsToCNeg(MachineInstr &MI);
+ LegalizeResult lowerFAbs(MachineInstr &MI);
LegalizeResult lowerVectorReduction(MachineInstr &MI);
LegalizeResult lowerMemcpyInline(MachineInstr &MI);
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3fece81df1f2fd..8c142f084fb341 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4255,6 +4255,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerShlSat(MI);
case G_ABS:
return lowerAbsToAddXor(MI);
+ case G_FABS:
+ return lowerFAbs(MI);
case G_SELECT:
return lowerSelect(MI);
case G_IS_FPCLASS:
@@ -8761,6 +8763,26 @@ LegalizerHelper::lowerAbsToCNeg(MachineInstr &MI) {
return Legalized;
}
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFAbs(MachineInstr &MI) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+
+ LLT Ty = MRI.getType(DstReg);
+ if (MRI.getType(SrcReg) != Ty)
+ return UnableToLegalize;
+
+ if (!Ty.isScalar())
+ return UnableToLegalize;
+
+ // Reset sign bit
+ MIRBuilder.buildAnd(DstReg, SrcReg,
+ MIRBuilder.buildConstant(
+ Ty, APInt::getSignedMaxValue(Ty.getSizeInBits())));
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
Register SrcReg = MI.getOperand(1).getReg();
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 9a7b9e143917f6..64a035b356396f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -264,7 +264,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const auto &Ty = Query.Types[0];
return (Ty == v8s16 || Ty == v4s16) && HasFP16;
})
- .customFor({s128})
+ .lowerFor({s128})
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
.minScalarOrElt(0, MinFPScalar)
.clampNumElements(0, v4s16, v8s16)
@@ -1360,8 +1360,6 @@ bool AArch64LegalizerInfo::legalizeCustom(
return legalizePrefetch(MI, Helper);
case TargetOpcode::G_ABS:
return Helper.lowerAbsToCNeg(MI);
- case TargetOpcode::G_FABS:
- return legalizeFABS(MI, MRI, MIRBuilder);
case TargetOpcode::G_ICMP:
return legalizeICMP(MI, MRI, MIRBuilder);
}
@@ -1422,25 +1420,6 @@ bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
return true;
}
-bool AArch64LegalizerInfo::legalizeFABS(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
- Register SrcReg = MI.getOperand(1).getReg();
- Register DstReg = MI.getOperand(0).getReg();
-
- constexpr LLT S128 = LLT::scalar(128);
- if (MRI.getType(SrcReg) != S128 || MRI.getType(DstReg) != S128)
- return false;
-
- MIRBuilder.buildAnd(
- DstReg, SrcReg,
- MIRBuilder.buildConstant(
- S128, APInt::getSignedMaxValue(128)));
-
- MI.eraseFromParent();
- return true;
-}
-
bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 8bf642d1745aa9..00d85a36e4b2ca 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -50,8 +50,6 @@ class AArch64LegalizerInfo : public LegalizerInfo {
LegalizerHelper &Helper) const;
bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
- bool legalizeFABS(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
bool legalizeICMP(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const;
bool legalizeFunnelShift(MachineInstr &MI, MachineRegisterInfo &MRI,
>From b8e3193a853071bdefa87d0a4aeaab3ea0979a0b Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Tue, 20 Aug 2024 09:27:17 +0100
Subject: [PATCH 4/8] Remove unnecessary checks
---
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 5 -----
1 file changed, 5 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 8c142f084fb341..79b8cc9038a096 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8768,11 +8768,6 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFAbs(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(DstReg);
- if (MRI.getType(SrcReg) != Ty)
- return UnableToLegalize;
-
- if (!Ty.isScalar())
- return UnableToLegalize;
// Reset sign bit
MIRBuilder.buildAnd(DstReg, SrcReg,
>From b85fd556bbd7ecc05f184c7b4988a926f779d12d Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Fri, 23 Aug 2024 10:36:21 +0100
Subject: [PATCH 5/8] Apply suggestions: - Support vectors in lowerFAbs -
Simplify legality predicates
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 7 +-
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 17 ++--
llvm/test/CodeGen/AArch64/fabs-fp128.ll | 88 +++++++++++++++++++
3 files changed, 104 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 79b8cc9038a096..776517e9aa8837 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8770,9 +8770,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFAbs(MachineInstr &MI) {
LLT Ty = MRI.getType(DstReg);
// Reset sign bit
- MIRBuilder.buildAnd(DstReg, SrcReg,
- MIRBuilder.buildConstant(
- Ty, APInt::getSignedMaxValue(Ty.getSizeInBits())));
+ MIRBuilder.buildAnd(
+ DstReg, SrcReg,
+ MIRBuilder.buildConstant(
+ Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
MI.eraseFromParent();
return Legalized;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 64a035b356396f..52b39a6bf07c70 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -89,6 +89,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const bool HasFP16 = ST.hasFullFP16();
const LLT &MinFPScalar = HasFP16 ? s16 : s32;
+ // A legality predicate that returns true if the subtarget has FP16 support.
+ // To be used in combination with other predicates, e.g:
+ // .legalIf(all(hasFP16(), typeInSet(0, {v8s16, v4s16})))
+ const auto hasFP16 = [=]() -> LegalityPredicate {
+ return [=](const LegalityQuery &) { return HasFP16; };
+ };
+
const bool HasCSSC = ST.hasCSSC();
const bool HasRCPC3 = ST.hasRCPC3();
@@ -260,12 +267,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_FABS)
.legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
- .legalIf([=](const LegalityQuery &Query) {
- const auto &Ty = Query.Types[0];
- return (Ty == v8s16 || Ty == v4s16) && HasFP16;
- })
- .lowerFor({s128})
+ .legalIf(all(hasFP16(), typeInSet(0, {v8s16, v4s16})))
+ // TODO: Lower supports 128-bit types but G_AND generated by Lower does
+ // not yet.
+ // When it does, we can remove scalarizeIf.
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+ .lowerIf(scalarOrEltWiderThan(0, 64))
.minScalarOrElt(0, MinFPScalar)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v2s32, v4s32)
diff --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
index 131af5e0a3281e..36d64d72b207a2 100644
--- a/llvm/test/CodeGen/AArch64/fabs-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
@@ -78,5 +78,93 @@ entry:
ret <2 x fp128> %c
}
+define <3 x fp128> @fabs_v3f128(<3 x fp128> %a) {
+; CHECK-SD-LABEL: fabs_v3f128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp q0, q1, [sp, #-48]!
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: ldrb w8, [sp, #15]
+; CHECK-SD-NEXT: str q2, [sp, #32]
+; CHECK-SD-NEXT: and w8, w8, #0x7f
+; CHECK-SD-NEXT: strb w8, [sp, #15]
+; CHECK-SD-NEXT: ldrb w8, [sp, #31]
+; CHECK-SD-NEXT: and w8, w8, #0x7f
+; CHECK-SD-NEXT: strb w8, [sp, #31]
+; CHECK-SD-NEXT: ldrb w8, [sp, #47]
+; CHECK-SD-NEXT: ldp q0, q1, [sp]
+; CHECK-SD-NEXT: and w8, w8, #0x7f
+; CHECK-SD-NEXT: strb w8, [sp, #47]
+; CHECK-SD-NEXT: ldr q2, [sp, #32]
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fabs_v3f128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: mov x9, v1.d[1]
+; CHECK-GI-NEXT: mov x10, v2.d[1]
+; CHECK-GI-NEXT: mov v0.d[0], v0.d[0]
+; CHECK-GI-NEXT: mov v1.d[0], v1.d[0]
+; CHECK-GI-NEXT: mov v2.d[0], v2.d[0]
+; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-GI-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-GI-NEXT: and x10, x10, #0x7fffffffffffffff
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: mov v2.d[1], x10
+; CHECK-GI-NEXT: ret
+entry:
+ %c = call <3 x fp128> @llvm.fabs.v3f128(<3 x fp128> %a)
+ ret <3 x fp128> %c
+}
+
+define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) {
+; CHECK-SD-LABEL: fabs_v4f128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: stp q0, q1, [sp, #-64]!
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT: ldrb w8, [sp, #15]
+; CHECK-SD-NEXT: stp q2, q3, [sp, #32]
+; CHECK-SD-NEXT: and w8, w8, #0x7f
+; CHECK-SD-NEXT: strb w8, [sp, #15]
+; CHECK-SD-NEXT: ldrb w8, [sp, #31]
+; CHECK-SD-NEXT: and w8, w8, #0x7f
+; CHECK-SD-NEXT: strb w8, [sp, #31]
+; CHECK-SD-NEXT: ldrb w8, [sp, #47]
+; CHECK-SD-NEXT: ldp q0, q1, [sp]
+; CHECK-SD-NEXT: and w8, w8, #0x7f
+; CHECK-SD-NEXT: strb w8, [sp, #47]
+; CHECK-SD-NEXT: ldrb w8, [sp, #63]
+; CHECK-SD-NEXT: and w8, w8, #0x7f
+; CHECK-SD-NEXT: strb w8, [sp, #63]
+; CHECK-SD-NEXT: ldp q2, q3, [sp, #32]
+; CHECK-SD-NEXT: add sp, sp, #64
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fabs_v4f128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: mov v7.d[0], v0.d[0]
+; CHECK-GI-NEXT: mov x9, v1.d[1]
+; CHECK-GI-NEXT: mov x10, v2.d[1]
+; CHECK-GI-NEXT: mov x11, v3.d[1]
+; CHECK-GI-NEXT: mov v1.d[0], v1.d[0]
+; CHECK-GI-NEXT: mov v2.d[0], v2.d[0]
+; CHECK-GI-NEXT: mov v3.d[0], v3.d[0]
+; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-GI-NEXT: mov v7.d[1], x8
+; CHECK-GI-NEXT: and x8, x9, #0x7fffffffffffffff
+; CHECK-GI-NEXT: and x9, x10, #0x7fffffffffffffff
+; CHECK-GI-NEXT: and x10, x11, #0x7fffffffffffffff
+; CHECK-GI-NEXT: mov v1.d[1], x8
+; CHECK-GI-NEXT: mov v2.d[1], x9
+; CHECK-GI-NEXT: mov v3.d[1], x10
+; CHECK-GI-NEXT: mov v0.16b, v7.16b
+; CHECK-GI-NEXT: ret
+entry:
+ %c = call <4 x fp128> @llvm.fabs.v4f128(<4 x fp128> %a)
+ ret <4 x fp128> %c
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
>From c82d6507a492c5510f38c7c5fdae7f57d0aa6845 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Thu, 29 Aug 2024 11:31:16 +0100
Subject: [PATCH 6/8] Rollback hasFP16
---
.../Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 15 ++++-----------
1 file changed, 4 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 52b39a6bf07c70..db5cd1d32d73d0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -89,13 +89,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const bool HasFP16 = ST.hasFullFP16();
const LLT &MinFPScalar = HasFP16 ? s16 : s32;
- // A legality predicate that returns true if the subtarget has FP16 support.
- // To be used in combination with other predicates, e.g:
- // .legalIf(all(hasFP16(), typeInSet(0, {v8s16, v4s16})))
- const auto hasFP16 = [=]() -> LegalityPredicate {
- return [=](const LegalityQuery &) { return HasFP16; };
- };
-
const bool HasCSSC = ST.hasCSSC();
const bool HasRCPC3 = ST.hasRCPC3();
@@ -267,10 +260,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_FABS)
.legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
- .legalIf(all(hasFP16(), typeInSet(0, {v8s16, v4s16})))
- // TODO: Lower supports 128-bit types but G_AND generated by Lower does
- // not yet.
- // When it does, we can remove scalarizeIf.
+ .legalIf([=](const LegalityQuery &Query) {
+ const auto &Ty = Query.Types[0];
+ return (Ty == v8s16 || Ty == v4s16) && HasFP16;
+ })
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
.lowerIf(scalarOrEltWiderThan(0, 64))
.minScalarOrElt(0, MinFPScalar)
>From dd106e18aaf256db1cbba635b2fb5d8f0a9ba3c1 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Thu, 29 Aug 2024 11:33:17 +0100
Subject: [PATCH 7/8] Clarify test run lines
---
llvm/test/CodeGen/AArch64/fabs-fp128.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
index 36d64d72b207a2..903aa8adf70851 100644
--- a/llvm/test/CodeGen/AArch64/fabs-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=aarch64-unknown-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel=0 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel=1 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define fp128 @fabs_f128(fp128 %a) {
; CHECK-SD-LABEL: fabs_f128:
>From 2133a0e7703fbd7b3e22d0cb3413c3d1a7883945 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Mon, 2 Sep 2024 17:15:31 +0100
Subject: [PATCH 8/8] Update legalizer-info-validation.mir
---
.../CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 87a415b45cca9a..b3b85090d11251 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=aarch64-- -run-pass=legalizer %s \
# RUN: -mcpu=cortex-a75 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK
@@ -538,7 +539,6 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FABS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices
More information about the llvm-commits
mailing list