[llvm] 3106e97 - [AArch64][GlobalISel] Generate libcall for s128 G_FPTRUNC (#96602)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 28 07:36:30 PDT 2024
Author: Madhur Amilkanthwar
Date: 2024-06-28T20:06:27+05:30
New Revision: 3106e97d4bdd46a38fa5c50578cf4aee62d04bf3
URL: https://github.com/llvm/llvm-project/commit/3106e97d4bdd46a38fa5c50578cf4aee62d04bf3
DIFF: https://github.com/llvm/llvm-project/commit/3106e97d4bdd46a38fa5c50578cf4aee62d04bf3.diff
LOG: [AArch64][GlobalISel] Generate libcall for s128 G_FPTRUNC (#96602)
This patch would generate libcall if the source type is s128. This is
the root cause for many fallbacks in RajaPerf benchmark.
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/fptrunc.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 50ba3057928ee..82d5a56d1ff25 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -648,6 +648,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_FPTRUNC)
.legalFor(
{{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
+ .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
.clampNumElements(0, v4s16, v4s16)
.clampNumElements(0, v2s32, v2s32)
.scalarize(0);
diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll
index 9d0672d1c95ea..aec5d7959226c 100644
--- a/llvm/test/CodeGen/AArch64/fptrunc.ll
+++ b/llvm/test/CodeGen/AArch64/fptrunc.ll
@@ -32,6 +32,208 @@ entry:
ret half %c
}
+
+define half @fptrunc_f128_f16(fp128 %a) {
+; CHECK-SD-LABEL: fptrunc_f128_f16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __trunctfhf2
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fptrunc_f128_f16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: b __trunctfhf2
+entry:
+ %c = fptrunc fp128 %a to half
+ ret half %c
+}
+
+define float @fptrunc_f128_f32(fp128 %a) {
+; CHECK-SD-LABEL: fptrunc_f128_f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __trunctfsf2
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fptrunc_f128_f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: b __trunctfsf2
+entry:
+ %c = fptrunc fp128 %a to float
+ ret float %c
+}
+
+define double @fptrunc_f128_f64(fp128 %a) {
+; CHECK-SD-LABEL: fptrunc_f128_f64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __trunctfdf2
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fptrunc_f128_f64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: b __trunctfdf2
+entry:
+ %c = fptrunc fp128 %a to double
+ ret double %c
+}
+
+define <2 x half> @fptrunc_v2f128_v2f16(<2 x fp128> %a) {
+; CHECK-SD-LABEL: fptrunc_v2f128_v2f16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: bl __trunctfhf2
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 def $q0
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: bl __trunctfhf2
+; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 def $q0
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fptrunc_v2f128_v2f16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #64
+; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: mov v2.d[0], x8
+; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: mov v2.d[1], x8
+; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __trunctfhf2
+; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: bl __trunctfhf2
+; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: bl __trunctfhf2
+; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: bl __trunctfhf2
+; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: add sp, sp, #64
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fptrunc <2 x fp128> %a to <2 x half>
+ ret <2 x half> %c
+}
+
+
+define <2 x float> @fptrunc_v2f128_v2f32(<2 x fp128> %a) {
+; CHECK-SD-LABEL: fptrunc_v2f128_v2f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: bl __trunctfsf2
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: bl __trunctfsf2
+; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fptrunc_v2f128_v2f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #48
+; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __trunctfsf2
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: bl __trunctfsf2
+; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[1], v0.s[0]
+; CHECK-GI-NEXT: fmov d0, d1
+; CHECK-GI-NEXT: add sp, sp, #48
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fptrunc <2 x fp128> %a to <2 x float>
+ ret <2 x float> %c
+}
+
+
+define <2 x double> @fptrunc_v2f128_v2f64(<2 x fp128> %a) {
+; CHECK-SD-LABEL: fptrunc_v2f128_v2f64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: bl __trunctfdf2
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: bl __trunctfdf2
+; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fptrunc_v2f128_v2f64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub sp, sp, #48
+; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __trunctfdf2
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: bl __trunctfdf2
+; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NEXT: add sp, sp, #48
+; CHECK-GI-NEXT: ret
+entry:
+ %c = fptrunc <2 x fp128> %a to <2 x double>
+ ret <2 x double> %c
+}
+
define <2 x float> @fptrunc_v2f64_v2f32(<2 x double> %a) {
; CHECK-LABEL: fptrunc_v2f64_v2f32:
; CHECK: // %bb.0: // %entry
More information about the llvm-commits
mailing list