[llvm] [AArch64][GlobalISel] Add fp128 and i128 fptosi/fptoui handling. (PR #95528)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 14 04:13:23 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-globalisel

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

Any fp128 need to end up as libcall, as will f32->i128 and f64->i128. f16 are a bit special as the maximum range of the result fits in a i17, so can be shrank to an i64. Vector with i128/fp128 types are scalarized.

---

Patch is 131.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95528.diff


5 Files Affected:

- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+8-6) 
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+28-10) 
- (removed) llvm/test/CodeGen/AArch64/GlobalISel/gisel-fail-intermediate-legalizer.ll (-8) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir (-76) 
- (modified) llvm/test/CodeGen/AArch64/fptoi.ll (+1793-974) 


``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 9830b521797c1..db950f19f4580 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1125,13 +1125,15 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
     // FIXME: Support other types
     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
-    if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
+    if ((ToSize != 32 && ToSize != 64 && ToSize != 128) ||
+        (FromSize != 32 && FromSize != 64 && FromSize != 128))
       return UnableToLegalize;
-    LegalizeResult Status = conversionLibcall(
-        MI, MIRBuilder,
-        ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
-        FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
-        LocObserver);
+    LegalizeResult Status =
+        conversionLibcall(MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize),
+                          FromSize == 128  ? Type::getFP128Ty(Ctx)
+                          : FromSize == 64 ? Type::getDoubleTy(Ctx)
+                                           : Type::getFloatTy(Ctx),
+                          LocObserver);
     if (Status != Legalized)
       return Status;
     break;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 42cd43c3afa37..fef0b722efe45 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -661,7 +661,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   // Conversions
   getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
-      .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
+      .legalFor({{s32, s32},
+                 {s64, s32},
+                 {s32, s64},
+                 {s64, s64},
+                 {v2s64, v2s64},
+                 {v4s32, v4s32},
+                 {v2s32, v2s32}})
       .legalIf([=](const LegalityQuery &Query) {
         return HasFP16 &&
                (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
@@ -669,26 +675,38 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
                (Query.Types[0] == s32 || Query.Types[0] == s64 ||
                 Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
       })
-      .widenScalarToNextPow2(0)
-      .clampScalar(0, s32, s64)
-      .widenScalarToNextPow2(1)
-      .clampScalarOrElt(1, MinFPScalar, s64)
+      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+      .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
+      // The range of a fp16 value fits into an i17, so we can lower the width
+      // to i64.
+      .narrowScalarIf(
+          [=](const LegalityQuery &Query) {
+            return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
+          },
+          changeTo(0, s64))
       .moreElementsToNextPow2(0)
+      .widenScalarOrEltToNextPow2OrMinSize(0)
+      .minScalar(0, s32)
+      .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
       .widenScalarIf(
           [=](const LegalityQuery &Query) {
-            return Query.Types[0].getScalarSizeInBits() >
-                   Query.Types[1].getScalarSizeInBits();
+            return Query.Types[0].getScalarSizeInBits() <= 64 &&
+                   Query.Types[0].getScalarSizeInBits() >
+                       Query.Types[1].getScalarSizeInBits();
           },
           LegalizeMutations::changeElementSizeTo(1, 0))
       .widenScalarIf(
           [=](const LegalityQuery &Query) {
-            return Query.Types[0].getScalarSizeInBits() <
-                   Query.Types[1].getScalarSizeInBits();
+            return Query.Types[1].getScalarSizeInBits() <= 64 &&
+                   Query.Types[0].getScalarSizeInBits() <
+                       Query.Types[1].getScalarSizeInBits();
           },
           LegalizeMutations::changeElementSizeTo(0, 1))
       .clampNumElements(0, v4s16, v8s16)
       .clampNumElements(0, v2s32, v4s32)
-      .clampMaxNumElements(0, s64, 2);
+      .clampMaxNumElements(0, s64, 2)
+      .libcallFor(
+          {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
 
   getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
       .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-fail-intermediate-legalizer.ll b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-fail-intermediate-legalizer.ll
deleted file mode 100644
index e5ca0d41fc549..0000000000000
--- a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-fail-intermediate-legalizer.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-;RUN: llc -mtriple=aarch64-unknown-unknown -o - -global-isel -global-isel-abort=2 %s 2>&1 | FileCheck %s
-; CHECK: fallback
-; CHECK-LABEL: foo
-define i16 @foo(ptr %p) {
-  %tmp0 = load fp128, ptr %p
-  %tmp1 = fptoui fp128 %tmp0 to i16
-  ret i16 %tmp1
-}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir
index a9afc61cb42a6..a3094225a031a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir
@@ -711,79 +711,3 @@ body:             |
     %1:fpr(<2 x s32>) = G_UITOFP %0
     $d0 = COPY %1(<2 x s32>)
 ...
-
----
-name:            fptosi_v2s64_v2s32
-legalized:       true
-regBankSelected: true
-
-body:             |
-  bb.0:
-    liveins: $d0
-
-    ; CHECK-LABEL: name: fptosi_v2s64_v2s32
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[FCVTLv2i32_:%[0-9]+]]:fpr128 = nofpexcept FCVTLv2i32 [[COPY]]
-    ; CHECK: [[FCVTZSv2f64_:%[0-9]+]]:fpr128 = nofpexcept FCVTZSv2f64 [[FCVTLv2i32_]]
-    ; CHECK: $q0 = COPY [[FCVTZSv2f64_]]
-    %0:fpr(<2 x s32>) = COPY $d0
-    %1:fpr(<2 x s64>) = G_FPTOSI %0
-    $q0 = COPY %1(<2 x s64>)
-...
-
----
-name:            fptoui_v2s64_v2s32
-legalized:       true
-regBankSelected: true
-
-body:             |
-  bb.0:
-    liveins: $d0
-
-    ; CHECK-LABEL: name: fptoui_v2s64_v2s32
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[FCVTLv2i32_:%[0-9]+]]:fpr128 = nofpexcept FCVTLv2i32 [[COPY]]
-    ; CHECK: [[FCVTZUv2f64_:%[0-9]+]]:fpr128 = nofpexcept FCVTZUv2f64 [[FCVTLv2i32_]]
-    ; CHECK: $q0 = COPY [[FCVTZUv2f64_]]
-    %0:fpr(<2 x s32>) = COPY $d0
-    %1:fpr(<2 x s64>) = G_FPTOUI %0
-    $q0 = COPY %1(<2 x s64>)
-...
-
----
-name:            fptosi_v2s32_v2s64
-legalized:       true
-regBankSelected: true
-
-body:             |
-  bb.0:
-    liveins: $q0
-
-    ; CHECK-LABEL: name: fptosi_v2s32_v2s64
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[FCVTZSv2f64_:%[0-9]+]]:fpr128 = nofpexcept FCVTZSv2f64 [[COPY]]
-    ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[FCVTZSv2f64_]]
-    ; CHECK: $d0 = COPY [[XTNv2i32_]]
-    %0:fpr(<2 x s64>) = COPY $q0
-    %1:fpr(<2 x s32>) = G_FPTOSI %0
-    $d0 = COPY %1(<2 x s32>)
-...
-
----
-name:            fptoui_v2s32_v2s64
-legalized:       true
-regBankSelected: true
-
-body:             |
-  bb.0:
-    liveins: $q0
-
-    ; CHECK-LABEL: name: fptoui_v2s32_v2s64
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[FCVTZUv2f64_:%[0-9]+]]:fpr128 = nofpexcept FCVTZUv2f64 [[COPY]]
-    ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[FCVTZUv2f64_]]
-    ; CHECK: $d0 = COPY [[XTNv2i32_]]
-    %0:fpr(<2 x s64>) = COPY $q0
-    %1:fpr(<2 x s32>) = G_FPTOUI %0
-    $d0 = COPY %1(<2 x s32>)
-...
diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index 3b8054a635bcd..4723ac01d6021 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -1,55 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
-; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
-; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
-
-; CHECK-GI:       warning: Instruction selection used fallback path for fptos_f64_i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_f64_i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_f32_i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_f32_i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_f128_i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_f128_i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_f128_i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_f128_i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_f128_i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_f128_i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_f128_i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_f128_i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_f128_i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_f128_i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f64_v2i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f64_v2i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f64_v3i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f64_v3i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f32_v2i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f32_v2i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f32_v3i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f32_v3i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f16_v2i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f16_v2i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f16_v3i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f16_v3i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f128_v2i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f128_v2i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f128_v3i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f128_v3i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f128_v2i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f128_v2i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f128_v3i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f128_v3i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f128_v2i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f128_v2i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f128_v3i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f128_v3i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f128_v2i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f128_v2i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f128_v3i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f128_v3i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v2f128_v2i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v2f128_v2i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptos_v3f128_v3i128
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptou_v3f128_v3i128
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
+; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
 
 define i64 @fptos_f64_i64(double %a) {
 ; CHECK-LABEL: fptos_f64_i64:
@@ -558,56 +511,72 @@ entry:
 }
 
 define i64 @fptos_f128_i64(fp128 %a) {
-; CHECK-LABEL: fptos_f128_i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    bl __fixtfdi
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_f128_i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    bl __fixtfdi
+; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_f128_i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    b __fixtfdi
 entry:
   %c = fptosi fp128 %a to i64
   ret i64 %c
 }
 
 define i64 @fptou_f128_i64(fp128 %a) {
-; CHECK-LABEL: fptou_f128_i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    bl __fixunstfdi
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_f128_i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    bl __fixunstfdi
+; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_f128_i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    b __fixunstfdi
 entry:
   %c = fptoui fp128 %a to i64
   ret i64 %c
 }
 
 define i32 @fptos_f128_i32(fp128 %a) {
-; CHECK-LABEL: fptos_f128_i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    bl __fixtfsi
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_f128_i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptos_f128_i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    b __fixtfsi
 entry:
   %c = fptosi fp128 %a to i32
   ret i32 %c
 }
 
 define i32 @fptou_f128_i32(fp128 %a) {
-; CHECK-LABEL: fptou_f128_i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    bl __fixunstfsi
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_f128_i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    bl __fixunstfsi
+; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_f128_i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    b __fixunstfsi
 entry:
   %c = fptoui fp128 %a to i32
   ret i32 %c
@@ -628,14 +597,23 @@ entry:
 }
 
 define i16 @fptou_f128_i16(fp128 %a) {
-; CHECK-LABEL: fptou_f128_i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    bl __fixtfsi
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_f128_i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_f128_i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl __fixunstfsi
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui fp128 %a to i16
   ret i16 %c
@@ -656,14 +634,23 @@ entry:
 }
 
 define i8 @fptou_f128_i8(fp128 %a) {
-; CHECK-LABEL: fptou_f128_i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    bl __fixtfsi
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptou_f128_i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    bl __fixtfsi
+; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fptou_f128_i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT:    .cfi_offset w30, -16
+; CHECK-GI-NEXT:    bl __fixunstfsi
+; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui fp128 %a to i8
   ret i8 %c
@@ -2290,152 +2277,278 @@ entry:
 }
 
 define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) {
-; CHECK-LABEL: fptos_v2f64_v2i128:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w20, -16
-; CHECK-NEXT:    .cfi_offset w30, -32
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    mov d0, v0.d[1]
-; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    mov x19, x0
-; CHECK-NEXT:    mov x20, x1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    bl __fixdfti
-; CHECK-NEXT:    fmov d0, x0
-; CHECK-NEXT:    mov x2, x19
-; CHECK-NEXT:    mov x3, x20
-; CHECK-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    mov v0.d[1], x1
-; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    add sp, sp, #48
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fptos_v2f64_v2i128:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sub sp, sp, #48
+; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_d...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/95528


More information about the llvm-commits mailing list