[llvm] 2da7057 - [AArch64][GISel] Scalarize fp128 fadd/fsub/fmul/etc.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 20 02:40:26 PDT 2024
Author: David Green
Date: 2024-09-20T10:40:22+01:00
New Revision: 2da70572a2564c4250534efd9399a751cde1e962
URL: https://github.com/llvm/llvm-project/commit/2da70572a2564c4250534efd9399a751cde1e962
DIFF: https://github.com/llvm/llvm-project/commit/2da70572a2564c4250534efd9399a751cde1e962.diff
LOG: [AArch64][GISel] Scalarize fp128 fadd/fsub/fmul/etc.
Like other fp128/i128 vectors, we scalarize these operations to allow them to
be libcalled.
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/arm64-fp128.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 77a0dabb0183dd..a76cf24739e32a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -239,7 +239,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
.legalFor({{s32, s32}, {s64, s32}})
.clampScalar(0, s32, s64)
- .clampScalar(1, s32, s64)
+ .clampScalar(1, s32, s64)
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
@@ -253,6 +253,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return (Ty == v8s16 || Ty == v4s16) && HasFP16;
})
.libcallFor({s128})
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
.minScalarOrElt(0, MinFPScalar)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v2s32, v4s32)
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
index 5999ad0eaa0ec3..806bb7dd3237c5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
@@ -4,11 +4,6 @@
; CHECK-GI: warning: Instruction selection used fallback path for test_neg_sub
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_neg
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_add
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_sub
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_mul
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_div
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_neg_sub
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_neg
define fp128 @test_add(fp128 %lhs, fp128 %rhs) {
@@ -426,89 +421,161 @@ define fp128 @test_neg(fp128 %in) {
define <2 x fp128> @vec_add(<2 x fp128> %lhs, <2 x fp128> %rhs) {
-; CHECK-LABEL: vec_add:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
-; CHECK-NEXT: mov v1.16b, v2.16b
-; CHECK-NEXT: bl __addtf3
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-NEXT: bl __addtf3
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #64
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vec_add:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #64
+; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-SD-NEXT: mov v1.16b, v2.16b
+; CHECK-SD-NEXT: bl __addtf3
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT: bl __addtf3
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #64
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_add:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #64
+; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-GI-NEXT: mov v1.16b, v2.16b
+; CHECK-GI-NEXT: bl __addtf3
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT: bl __addtf3
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #64
+; CHECK-GI-NEXT: ret
%val = fadd <2 x fp128> %lhs, %rhs
ret <2 x fp128> %val
}
define <2 x fp128> @vec_sub(<2 x fp128> %lhs, <2 x fp128> %rhs) {
-; CHECK-LABEL: vec_sub:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
-; CHECK-NEXT: mov v1.16b, v2.16b
-; CHECK-NEXT: bl __subtf3
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-NEXT: bl __subtf3
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #64
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vec_sub:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #64
+; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-SD-NEXT: mov v1.16b, v2.16b
+; CHECK-SD-NEXT: bl __subtf3
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT: bl __subtf3
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #64
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_sub:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #64
+; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-GI-NEXT: mov v1.16b, v2.16b
+; CHECK-GI-NEXT: bl __subtf3
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT: bl __subtf3
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #64
+; CHECK-GI-NEXT: ret
%val = fsub <2 x fp128> %lhs, %rhs
ret <2 x fp128> %val
}
define <2 x fp128> @vec_mul(<2 x fp128> %lhs, <2 x fp128> %rhs) {
-; CHECK-LABEL: vec_mul:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
-; CHECK-NEXT: mov v1.16b, v2.16b
-; CHECK-NEXT: bl __multf3
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-NEXT: bl __multf3
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #64
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vec_mul:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #64
+; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-SD-NEXT: mov v1.16b, v2.16b
+; CHECK-SD-NEXT: bl __multf3
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT: bl __multf3
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #64
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_mul:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #64
+; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-GI-NEXT: mov v1.16b, v2.16b
+; CHECK-GI-NEXT: bl __multf3
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT: bl __multf3
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #64
+; CHECK-GI-NEXT: ret
%val = fmul <2 x fp128> %lhs, %rhs
ret <2 x fp128> %val
}
define <2 x fp128> @vec_div(<2 x fp128> %lhs, <2 x fp128> %rhs) {
-; CHECK-LABEL: vec_div:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
-; CHECK-NEXT: mov v1.16b, v2.16b
-; CHECK-NEXT: bl __divtf3
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-NEXT: bl __divtf3
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #64
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vec_div:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #64
+; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-SD-NEXT: mov v1.16b, v2.16b
+; CHECK-SD-NEXT: bl __divtf3
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT: bl __divtf3
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #64
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_div:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #64
+; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-GI-NEXT: mov v1.16b, v2.16b
+; CHECK-GI-NEXT: bl __divtf3
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT: bl __divtf3
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #64
+; CHECK-GI-NEXT: ret
%val = fdiv <2 x fp128> %lhs, %rhs
ret <2 x fp128> %val
}
@@ -1381,27 +1448,50 @@ define <2 x fp128> @vec_extend_f64(<2 x double> %val) {
}
define <2 x fp128> @vec_neg_sub(<2 x fp128> %in) {
-; CHECK-LABEL: vec_neg_sub:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: adrp x8, .LCPI47_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI47_0]
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: bl __subtf3
-; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: bl __subtf3
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #64
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: vec_neg_sub:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #64
+; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: adrp x8, .LCPI47_0
+; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI47_0]
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: bl __subtf3
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT: bl __subtf3
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #64
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_neg_sub:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #64
+; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: mov v2.16b, v0.16b
+; CHECK-GI-NEXT: adrp x8, .LCPI47_0
+; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI47_0]
+; CHECK-GI-NEXT: mov v1.16b, v2.16b
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __subtf3
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: bl __subtf3
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #64
+; CHECK-GI-NEXT: ret
%ret = fsub <2 x fp128> zeroinitializer, %in
ret <2 x fp128> %ret
}
More information about the llvm-commits
mailing list