[llvm] [AArch64][GlobalISel] Legalize 128-bit types for FABS (PR #104753)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 3 02:17:19 PDT 2024


https://github.com/Him188 updated https://github.com/llvm/llvm-project/pull/104753

>From 2e386b20c9b55ea51560c2bd1eba0e0478e2fbc0 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Tue, 30 Jul 2024 12:19:52 +0100
Subject: [PATCH 1/8] [AArch64][GlobalISel] Add test cases for 128-bit fabs.
 (NFC)

---
 llvm/test/CodeGen/AArch64/fabs-fp128.ll | 58 +++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/fabs-fp128.ll

diff --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
new file mode 100644
index 00000000000000..0232397ffc7c67
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel=1 -global-isel-abort=2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK: warning: Instruction selection used fallback path for fabs_f128
+; CHECK-NEXT: warning: Instruction selection used fallback path for fabs_v1f128
+; CHECK-NEXT: warning: Instruction selection used fallback path for fabs_v2f128
+
+define fp128 @fabs_f128(fp128 %a) {
+; CHECK-LABEL: fabs_f128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str q0, [sp, #-16]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ldrb w8, [sp, #15]
+; CHECK-NEXT:    and w8, w8, #0x7f
+; CHECK-NEXT:    strb w8, [sp, #15]
+; CHECK-NEXT:    ldr q0, [sp], #16
+; CHECK-NEXT:    ret
+entry:
+  %c = call fp128 @llvm.fabs.f128(fp128 %a)
+  ret fp128 %c
+}
+
+define <1 x fp128> @fabs_v1f128(<1 x fp128> %a) {
+; CHECK-LABEL: fabs_v1f128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str q0, [sp, #-16]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ldrb w8, [sp, #15]
+; CHECK-NEXT:    and w8, w8, #0x7f
+; CHECK-NEXT:    strb w8, [sp, #15]
+; CHECK-NEXT:    ldr q0, [sp], #16
+; CHECK-NEXT:    ret
+entry:
+  %c = call <1 x fp128> @llvm.fabs.v1f128(<1 x fp128> %a)
+  ret <1 x fp128> %c
+}
+
+define <2 x fp128> @fabs_v2f128(<2 x fp128> %a) {
+; CHECK-LABEL: fabs_v2f128:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    stp q0, q1, [sp, #-32]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    ldrb w8, [sp, #15]
+; CHECK-NEXT:    and w8, w8, #0x7f
+; CHECK-NEXT:    strb w8, [sp, #15]
+; CHECK-NEXT:    ldrb w8, [sp, #31]
+; CHECK-NEXT:    and w8, w8, #0x7f
+; CHECK-NEXT:    strb w8, [sp, #31]
+; CHECK-NEXT:    ldp q0, q1, [sp], #32
+; CHECK-NEXT:    ret
+entry:
+  %c = call <2 x fp128> @llvm.fabs.v2f128(<2 x fp128> %a)
+  ret <2 x fp128> %c
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}

>From c4a8d3845756019dd3f9b8dc00ad32ba61768702 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Tue, 30 Jul 2024 12:19:52 +0100
Subject: [PATCH 2/8] [AArch64][GlobalISel] Legalize 128-bit types for FABS

- Generate AND to clear sign bit for s128
- Vectors are scalarized
---
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |  41 ++++++-
 .../AArch64/GISel/AArch64LegalizerInfo.h      |   2 +
 llvm/test/CodeGen/AArch64/fabs-fp128.ll       | 100 +++++++++++-------
 3 files changed, 102 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 7eaf6a84bd204f..9a7b9e143917f6 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -242,9 +242,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .widenScalarToNextPow2(0);
 
   getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
-                               G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
-                               G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
-                               G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
+                               G_FSQRT, G_FMAXNUM, G_FMINNUM, G_FMAXIMUM,
+                               G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT,
+                               G_FNEARBYINT, G_INTRINSIC_TRUNC,
                                G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
       .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
       .legalIf([=](const LegalityQuery &Query) {
@@ -258,6 +258,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .clampNumElements(0, v2s64, v2s64)
       .moreElementsToNextPow2(0);
 
+  getActionDefinitionsBuilder(G_FABS)
+      .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
+      .legalIf([=](const LegalityQuery &Query) {
+        const auto &Ty = Query.Types[0];
+        return (Ty == v8s16 || Ty == v4s16) && HasFP16;
+      })
+      .customFor({s128})
+      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+      .minScalarOrElt(0, MinFPScalar)
+      .clampNumElements(0, v4s16, v8s16)
+      .clampNumElements(0, v2s32, v4s32)
+      .clampNumElements(0, v2s64, v2s64)
+      .moreElementsToNextPow2(0);
+
   getActionDefinitionsBuilder(G_FREM)
       .libcallFor({s32, s64})
       .minScalar(0, s32)
@@ -1346,6 +1360,8 @@ bool AArch64LegalizerInfo::legalizeCustom(
     return legalizePrefetch(MI, Helper);
   case TargetOpcode::G_ABS:
     return Helper.lowerAbsToCNeg(MI);
+  case TargetOpcode::G_FABS:
+    return legalizeFABS(MI, MRI, MIRBuilder);
   case TargetOpcode::G_ICMP:
     return legalizeICMP(MI, MRI, MIRBuilder);
   }
@@ -1406,6 +1422,25 @@ bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
   return true;
 }
 
+bool AArch64LegalizerInfo::legalizeFABS(MachineInstr &MI,
+                                        MachineRegisterInfo &MRI,
+                                        MachineIRBuilder &MIRBuilder) const {
+  Register SrcReg = MI.getOperand(1).getReg();
+  Register DstReg = MI.getOperand(0).getReg();
+
+  constexpr LLT S128 = LLT::scalar(128);
+  if (MRI.getType(SrcReg) != S128 || MRI.getType(DstReg) != S128)
+    return false;
+
+  MIRBuilder.buildAnd(
+      DstReg, SrcReg,
+      MIRBuilder.buildConstant(
+          S128, APInt::getSignedMaxValue(128)));
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
                                         MachineRegisterInfo &MRI,
                                         MachineIRBuilder &MIRBuilder) const {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 00d85a36e4b2ca..8bf642d1745aa9 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -50,6 +50,8 @@ class AArch64LegalizerInfo : public LegalizerInfo {
                                LegalizerHelper &Helper) const;
   bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI,
                       LegalizerHelper &Helper) const;
+  bool legalizeFABS(MachineInstr &MI, MachineRegisterInfo &MRI,
+                    MachineIRBuilder &MIRBuilder) const;
   bool legalizeICMP(MachineInstr &MI, MachineRegisterInfo &MRI,
                     MachineIRBuilder &MIRBuilder) const;
   bool legalizeFunnelShift(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
index 0232397ffc7c67..131af5e0a3281e 100644
--- a/llvm/test/CodeGen/AArch64/fabs-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
@@ -1,58 +1,82 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 -global-isel=1 -global-isel-abort=2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK: warning: Instruction selection used fallback path for fabs_f128
-; CHECK-NEXT: warning: Instruction selection used fallback path for fabs_v1f128
-; CHECK-NEXT: warning: Instruction selection used fallback path for fabs_v2f128
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define fp128 @fabs_f128(fp128 %a) {
-; CHECK-LABEL: fabs_f128:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str q0, [sp, #-16]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    ldrb w8, [sp, #15]
-; CHECK-NEXT:    and w8, w8, #0x7f
-; CHECK-NEXT:    strb w8, [sp, #15]
-; CHECK-NEXT:    ldr q0, [sp], #16
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fabs_f128:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    str q0, [sp, #-16]!
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    ldrb w8, [sp, #15]
+; CHECK-SD-NEXT:    and w8, w8, #0x7f
+; CHECK-SD-NEXT:    strb w8, [sp, #15]
+; CHECK-SD-NEXT:    ldr q0, [sp], #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fabs_f128:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
+; CHECK-GI-NEXT:    and x8, x8, #0x7fffffffffffffff
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    ret
 entry:
   %c = call fp128 @llvm.fabs.f128(fp128 %a)
   ret fp128 %c
 }
 
 define <1 x fp128> @fabs_v1f128(<1 x fp128> %a) {
-; CHECK-LABEL: fabs_v1f128:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str q0, [sp, #-16]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    ldrb w8, [sp, #15]
-; CHECK-NEXT:    and w8, w8, #0x7f
-; CHECK-NEXT:    strb w8, [sp, #15]
-; CHECK-NEXT:    ldr q0, [sp], #16
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fabs_v1f128:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    str q0, [sp, #-16]!
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    ldrb w8, [sp, #15]
+; CHECK-SD-NEXT:    and w8, w8, #0x7f
+; CHECK-SD-NEXT:    strb w8, [sp, #15]
+; CHECK-SD-NEXT:    ldr q0, [sp], #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fabs_v1f128:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
+; CHECK-GI-NEXT:    and x8, x8, #0x7fffffffffffffff
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    ret
 entry:
   %c = call <1 x fp128> @llvm.fabs.v1f128(<1 x fp128> %a)
   ret <1 x fp128> %c
 }
 
 define <2 x fp128> @fabs_v2f128(<2 x fp128> %a) {
-; CHECK-LABEL: fabs_v2f128:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    stp q0, q1, [sp, #-32]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    ldrb w8, [sp, #15]
-; CHECK-NEXT:    and w8, w8, #0x7f
-; CHECK-NEXT:    strb w8, [sp, #15]
-; CHECK-NEXT:    ldrb w8, [sp, #31]
-; CHECK-NEXT:    and w8, w8, #0x7f
-; CHECK-NEXT:    strb w8, [sp, #31]
-; CHECK-NEXT:    ldp q0, q1, [sp], #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fabs_v2f128:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    ldrb w8, [sp, #15]
+; CHECK-SD-NEXT:    and w8, w8, #0x7f
+; CHECK-SD-NEXT:    strb w8, [sp, #15]
+; CHECK-SD-NEXT:    ldrb w8, [sp, #31]
+; CHECK-SD-NEXT:    and w8, w8, #0x7f
+; CHECK-SD-NEXT:    strb w8, [sp, #31]
+; CHECK-SD-NEXT:    ldp q0, q1, [sp], #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fabs_v2f128:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    mov x9, v1.d[1]
+; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
+; CHECK-GI-NEXT:    mov v1.d[0], v1.d[0]
+; CHECK-GI-NEXT:    and x8, x8, #0x7fffffffffffffff
+; CHECK-GI-NEXT:    and x9, x9, #0x7fffffffffffffff
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    mov v1.d[1], x9
+; CHECK-GI-NEXT:    ret
 entry:
   %c = call <2 x fp128> @llvm.fabs.v2f128(<2 x fp128> %a)
   ret <2 x fp128> %c
 }
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-GI: {{.*}}
-; CHECK-SD: {{.*}}
+; CHECK: {{.*}}

>From eaa567d56bc2bbf6c42c8cbcbc5869059daabab2 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Mon, 19 Aug 2024 10:58:02 +0100
Subject: [PATCH 3/8] Generalize lowerFAbs in LegalizerHelper

---
 .../llvm/CodeGen/GlobalISel/LegalizerHelper.h |  1 +
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 22 ++++++++++++++++++
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    | 23 +------------------
 .../AArch64/GISel/AArch64LegalizerInfo.h      |  2 --
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 0b2cd299bde12a..afd68250f5ca6e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -437,6 +437,7 @@ class LegalizerHelper {
   LegalizeResult lowerAbsToAddXor(MachineInstr &MI);
   LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI);
   LegalizeResult lowerAbsToCNeg(MachineInstr &MI);
+  LegalizeResult lowerFAbs(MachineInstr &MI);
   LegalizeResult lowerVectorReduction(MachineInstr &MI);
   LegalizeResult lowerMemcpyInline(MachineInstr &MI);
   LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3fece81df1f2fd..8c142f084fb341 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4255,6 +4255,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     return lowerShlSat(MI);
   case G_ABS:
     return lowerAbsToAddXor(MI);
+  case G_FABS:
+    return lowerFAbs(MI);
   case G_SELECT:
     return lowerSelect(MI);
   case G_IS_FPCLASS:
@@ -8761,6 +8763,26 @@ LegalizerHelper::lowerAbsToCNeg(MachineInstr &MI) {
   return Legalized;
 }
 
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFAbs(MachineInstr &MI) {
+  Register SrcReg = MI.getOperand(1).getReg();
+  Register DstReg = MI.getOperand(0).getReg();
+
+  LLT Ty = MRI.getType(DstReg);
+  if (MRI.getType(SrcReg) != Ty)
+    return UnableToLegalize;
+
+  if (!Ty.isScalar())
+    return UnableToLegalize;
+
+  // Reset sign bit
+  MIRBuilder.buildAnd(DstReg, SrcReg,
+                      MIRBuilder.buildConstant(
+                          Ty, APInt::getSignedMaxValue(Ty.getSizeInBits())));
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
 LegalizerHelper::LegalizeResult
 LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
   Register SrcReg = MI.getOperand(1).getReg();
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 9a7b9e143917f6..64a035b356396f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -264,7 +264,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
         const auto &Ty = Query.Types[0];
         return (Ty == v8s16 || Ty == v4s16) && HasFP16;
       })
-      .customFor({s128})
+      .lowerFor({s128})
       .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
       .minScalarOrElt(0, MinFPScalar)
       .clampNumElements(0, v4s16, v8s16)
@@ -1360,8 +1360,6 @@ bool AArch64LegalizerInfo::legalizeCustom(
     return legalizePrefetch(MI, Helper);
   case TargetOpcode::G_ABS:
     return Helper.lowerAbsToCNeg(MI);
-  case TargetOpcode::G_FABS:
-    return legalizeFABS(MI, MRI, MIRBuilder);
   case TargetOpcode::G_ICMP:
     return legalizeICMP(MI, MRI, MIRBuilder);
   }
@@ -1422,25 +1420,6 @@ bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
   return true;
 }
 
-bool AArch64LegalizerInfo::legalizeFABS(MachineInstr &MI,
-                                        MachineRegisterInfo &MRI,
-                                        MachineIRBuilder &MIRBuilder) const {
-  Register SrcReg = MI.getOperand(1).getReg();
-  Register DstReg = MI.getOperand(0).getReg();
-
-  constexpr LLT S128 = LLT::scalar(128);
-  if (MRI.getType(SrcReg) != S128 || MRI.getType(DstReg) != S128)
-    return false;
-
-  MIRBuilder.buildAnd(
-      DstReg, SrcReg,
-      MIRBuilder.buildConstant(
-          S128, APInt::getSignedMaxValue(128)));
-
-  MI.eraseFromParent();
-  return true;
-}
-
 bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
                                         MachineRegisterInfo &MRI,
                                         MachineIRBuilder &MIRBuilder) const {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 8bf642d1745aa9..00d85a36e4b2ca 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -50,8 +50,6 @@ class AArch64LegalizerInfo : public LegalizerInfo {
                                LegalizerHelper &Helper) const;
   bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI,
                       LegalizerHelper &Helper) const;
-  bool legalizeFABS(MachineInstr &MI, MachineRegisterInfo &MRI,
-                    MachineIRBuilder &MIRBuilder) const;
   bool legalizeICMP(MachineInstr &MI, MachineRegisterInfo &MRI,
                     MachineIRBuilder &MIRBuilder) const;
   bool legalizeFunnelShift(MachineInstr &MI, MachineRegisterInfo &MRI,

>From b8e3193a853071bdefa87d0a4aeaab3ea0979a0b Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Tue, 20 Aug 2024 09:27:17 +0100
Subject: [PATCH 4/8] Remove unnecessary checks

---
 llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 8c142f084fb341..79b8cc9038a096 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8768,11 +8768,6 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFAbs(MachineInstr &MI) {
   Register DstReg = MI.getOperand(0).getReg();
 
   LLT Ty = MRI.getType(DstReg);
-  if (MRI.getType(SrcReg) != Ty)
-    return UnableToLegalize;
-
-  if (!Ty.isScalar())
-    return UnableToLegalize;
 
   // Reset sign bit
   MIRBuilder.buildAnd(DstReg, SrcReg,

>From b85fd556bbd7ecc05f184c7b4988a926f779d12d Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Fri, 23 Aug 2024 10:36:21 +0100
Subject: [PATCH 5/8] Apply suggestions: - Support vectors in lowerFAbs -
 Simplify legality predicates

---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  7 +-
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    | 17 ++--
 llvm/test/CodeGen/AArch64/fabs-fp128.ll       | 88 +++++++++++++++++++
 3 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 79b8cc9038a096..776517e9aa8837 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8770,9 +8770,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFAbs(MachineInstr &MI) {
   LLT Ty = MRI.getType(DstReg);
 
   // Reset sign bit
-  MIRBuilder.buildAnd(DstReg, SrcReg,
-                      MIRBuilder.buildConstant(
-                          Ty, APInt::getSignedMaxValue(Ty.getSizeInBits())));
+  MIRBuilder.buildAnd(
+      DstReg, SrcReg,
+      MIRBuilder.buildConstant(
+          Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
 
   MI.eraseFromParent();
   return Legalized;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 64a035b356396f..52b39a6bf07c70 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -89,6 +89,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   const bool HasFP16 = ST.hasFullFP16();
   const LLT &MinFPScalar = HasFP16 ? s16 : s32;
 
+  // A legality predicate that returns true if the subtarget has FP16 support.
+  // To be used in combination with other predicates, e.g:
+  //     .legalIf(all(hasFP16(), typeInSet(0, {v8s16, v4s16})))
+  const auto hasFP16 = [=]() -> LegalityPredicate {
+    return [=](const LegalityQuery &) { return HasFP16; };
+  };
+
   const bool HasCSSC = ST.hasCSSC();
   const bool HasRCPC3 = ST.hasRCPC3();
 
@@ -260,12 +267,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder(G_FABS)
       .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
-      .legalIf([=](const LegalityQuery &Query) {
-        const auto &Ty = Query.Types[0];
-        return (Ty == v8s16 || Ty == v4s16) && HasFP16;
-      })
-      .lowerFor({s128})
+      .legalIf(all(hasFP16(), typeInSet(0, {v8s16, v4s16})))
+      // TODO: Lower supports 128-bit types but G_AND generated by Lower does
+      // not yet.
+      //       When it does, we can remove scalarizeIf.
       .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+      .lowerIf(scalarOrEltWiderThan(0, 64))
       .minScalarOrElt(0, MinFPScalar)
       .clampNumElements(0, v4s16, v8s16)
       .clampNumElements(0, v2s32, v4s32)
diff --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
index 131af5e0a3281e..36d64d72b207a2 100644
--- a/llvm/test/CodeGen/AArch64/fabs-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
@@ -78,5 +78,93 @@ entry:
   ret <2 x fp128> %c
 }
 
+define <3 x fp128> @fabs_v3f128(<3 x fp128> %a) {
+; CHECK-SD-LABEL: fabs_v3f128:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    stp q0, q1, [sp, #-48]!
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT:    ldrb w8, [sp, #15]
+; CHECK-SD-NEXT:    str q2, [sp, #32]
+; CHECK-SD-NEXT:    and w8, w8, #0x7f
+; CHECK-SD-NEXT:    strb w8, [sp, #15]
+; CHECK-SD-NEXT:    ldrb w8, [sp, #31]
+; CHECK-SD-NEXT:    and w8, w8, #0x7f
+; CHECK-SD-NEXT:    strb w8, [sp, #31]
+; CHECK-SD-NEXT:    ldrb w8, [sp, #47]
+; CHECK-SD-NEXT:    ldp q0, q1, [sp]
+; CHECK-SD-NEXT:    and w8, w8, #0x7f
+; CHECK-SD-NEXT:    strb w8, [sp, #47]
+; CHECK-SD-NEXT:    ldr q2, [sp, #32]
+; CHECK-SD-NEXT:    add sp, sp, #48
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fabs_v3f128:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    mov x9, v1.d[1]
+; CHECK-GI-NEXT:    mov x10, v2.d[1]
+; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
+; CHECK-GI-NEXT:    mov v1.d[0], v1.d[0]
+; CHECK-GI-NEXT:    mov v2.d[0], v2.d[0]
+; CHECK-GI-NEXT:    and x8, x8, #0x7fffffffffffffff
+; CHECK-GI-NEXT:    and x9, x9, #0x7fffffffffffffff
+; CHECK-GI-NEXT:    and x10, x10, #0x7fffffffffffffff
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    mov v1.d[1], x9
+; CHECK-GI-NEXT:    mov v2.d[1], x10
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call <3 x fp128> @llvm.fabs.v3f128(<3 x fp128> %a)
+  ret <3 x fp128> %c
+}
+
+define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) {
+; CHECK-SD-LABEL: fabs_v4f128:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    stp q0, q1, [sp, #-64]!
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    ldrb w8, [sp, #15]
+; CHECK-SD-NEXT:    stp q2, q3, [sp, #32]
+; CHECK-SD-NEXT:    and w8, w8, #0x7f
+; CHECK-SD-NEXT:    strb w8, [sp, #15]
+; CHECK-SD-NEXT:    ldrb w8, [sp, #31]
+; CHECK-SD-NEXT:    and w8, w8, #0x7f
+; CHECK-SD-NEXT:    strb w8, [sp, #31]
+; CHECK-SD-NEXT:    ldrb w8, [sp, #47]
+; CHECK-SD-NEXT:    ldp q0, q1, [sp]
+; CHECK-SD-NEXT:    and w8, w8, #0x7f
+; CHECK-SD-NEXT:    strb w8, [sp, #47]
+; CHECK-SD-NEXT:    ldrb w8, [sp, #63]
+; CHECK-SD-NEXT:    and w8, w8, #0x7f
+; CHECK-SD-NEXT:    strb w8, [sp, #63]
+; CHECK-SD-NEXT:    ldp q2, q3, [sp, #32]
+; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fabs_v4f128:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov x8, v0.d[1]
+; CHECK-GI-NEXT:    mov v7.d[0], v0.d[0]
+; CHECK-GI-NEXT:    mov x9, v1.d[1]
+; CHECK-GI-NEXT:    mov x10, v2.d[1]
+; CHECK-GI-NEXT:    mov x11, v3.d[1]
+; CHECK-GI-NEXT:    mov v1.d[0], v1.d[0]
+; CHECK-GI-NEXT:    mov v2.d[0], v2.d[0]
+; CHECK-GI-NEXT:    mov v3.d[0], v3.d[0]
+; CHECK-GI-NEXT:    and x8, x8, #0x7fffffffffffffff
+; CHECK-GI-NEXT:    mov v7.d[1], x8
+; CHECK-GI-NEXT:    and x8, x9, #0x7fffffffffffffff
+; CHECK-GI-NEXT:    and x9, x10, #0x7fffffffffffffff
+; CHECK-GI-NEXT:    and x10, x11, #0x7fffffffffffffff
+; CHECK-GI-NEXT:    mov v1.d[1], x8
+; CHECK-GI-NEXT:    mov v2.d[1], x9
+; CHECK-GI-NEXT:    mov v3.d[1], x10
+; CHECK-GI-NEXT:    mov v0.16b, v7.16b
+; CHECK-GI-NEXT:    ret
+entry:
+  %c = call <4 x fp128> @llvm.fabs.v4f128(<4 x fp128> %a)
+  ret <4 x fp128> %c
+}
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK: {{.*}}

>From c82d6507a492c5510f38c7c5fdae7f57d0aa6845 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Thu, 29 Aug 2024 11:31:16 +0100
Subject: [PATCH 6/8] Rollback hasFP16

---
 .../Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 52b39a6bf07c70..db5cd1d32d73d0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -89,13 +89,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   const bool HasFP16 = ST.hasFullFP16();
   const LLT &MinFPScalar = HasFP16 ? s16 : s32;
 
-  // A legality predicate that returns true if the subtarget has FP16 support.
-  // To be used in combination with other predicates, e.g:
-  //     .legalIf(all(hasFP16(), typeInSet(0, {v8s16, v4s16})))
-  const auto hasFP16 = [=]() -> LegalityPredicate {
-    return [=](const LegalityQuery &) { return HasFP16; };
-  };
-
   const bool HasCSSC = ST.hasCSSC();
   const bool HasRCPC3 = ST.hasRCPC3();
 
@@ -267,10 +260,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder(G_FABS)
       .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
-      .legalIf(all(hasFP16(), typeInSet(0, {v8s16, v4s16})))
-      // TODO: Lower supports 128-bit types but G_AND generated by Lower does
-      // not yet.
-      //       When it does, we can remove scalarizeIf.
+      .legalIf([=](const LegalityQuery &Query) {
+        const auto &Ty = Query.Types[0];
+        return (Ty == v8s16 || Ty == v4s16) && HasFP16;
+      })
       .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
       .lowerIf(scalarOrEltWiderThan(0, 64))
       .minScalarOrElt(0, MinFPScalar)

>From dd106e18aaf256db1cbba635b2fb5d8f0a9ba3c1 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Thu, 29 Aug 2024 11:33:17 +0100
Subject: [PATCH 7/8] Clarify test run lines

---
 llvm/test/CodeGen/AArch64/fabs-fp128.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
index 36d64d72b207a2..903aa8adf70851 100644
--- a/llvm/test/CodeGen/AArch64/fabs-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=aarch64-unknown-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel=0 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel=1 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define fp128 @fabs_f128(fp128 %a) {
 ; CHECK-SD-LABEL: fabs_f128:

>From 2133a0e7703fbd7b3e22d0cb3413c3d1a7883945 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Mon, 2 Sep 2024 17:15:31 +0100
Subject: [PATCH 8/8] Update legalizer-info-validation.mir

---
 .../CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 87a415b45cca9a..b3b85090d11251 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=aarch64-- -run-pass=legalizer %s \
 # RUN:     -mcpu=cortex-a75 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK
 
@@ -538,7 +539,6 @@
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_FABS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices



More information about the llvm-commits mailing list