[llvm] eaeb1fb - [AArch64] Cleanup and extend arm64-fp128.ll. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 20 02:12:44 PDT 2024
Author: David Green
Date: 2024-09-20T10:12:39+01:00
New Revision: eaeb1fbcc05e15728f5fc96f08da3bcd24b0fc73
URL: https://github.com/llvm/llvm-project/commit/eaeb1fbcc05e15728f5fc96f08da3bcd24b0fc73
DIFF: https://github.com/llvm/llvm-project/commit/eaeb1fbcc05e15728f5fc96f08da3bcd24b0fc73.diff
LOG: [AArch64] Cleanup and extend arm64-fp128.ll. NFC
This rewrites the existing tests that load from globals to a more modern style,
and adds vector and GIsel test coverage.
Added:
Modified:
llvm/test/CodeGen/AArch64/arm64-fp128.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
index 61e64e219355fc..5999ad0eaa0ec3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
@@ -1,436 +1,1424 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -aarch64-enable-atomic-cfg-tidy=0 < %s | FileCheck -enable-var-scope %s
-
- at lhs = dso_local global fp128 zeroinitializer, align 16
- at rhs = dso_local global fp128 zeroinitializer, align 16
-
-define fp128 @test_add() {
+; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -global-isel -global-isel-abort=2 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI: warning: Instruction selection used fallback path for test_neg_sub
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_neg
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_add
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_sub
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_mul
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_div
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_neg_sub
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_neg
+
+define fp128 @test_add(fp128 %lhs, fp128 %rhs) {
; CHECK-LABEL: test_add:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, lhs
-; CHECK-NEXT: ldr q0, [x8, :lo12:lhs]
-; CHECK-NEXT: adrp x8, rhs
-; CHECK-NEXT: ldr q1, [x8, :lo12:rhs]
; CHECK-NEXT: b __addtf3
-
- %lhs = load fp128, ptr @lhs, align 16
- %rhs = load fp128, ptr @rhs, align 16
-
%val = fadd fp128 %lhs, %rhs
ret fp128 %val
}
-define fp128 @test_sub() {
+define fp128 @test_sub(fp128 %lhs, fp128 %rhs) {
; CHECK-LABEL: test_sub:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, lhs
-; CHECK-NEXT: ldr q0, [x8, :lo12:lhs]
-; CHECK-NEXT: adrp x8, rhs
-; CHECK-NEXT: ldr q1, [x8, :lo12:rhs]
; CHECK-NEXT: b __subtf3
-
- %lhs = load fp128, ptr @lhs, align 16
- %rhs = load fp128, ptr @rhs, align 16
-
%val = fsub fp128 %lhs, %rhs
ret fp128 %val
}
-define fp128 @test_mul() {
+define fp128 @test_mul(fp128 %lhs, fp128 %rhs) {
; CHECK-LABEL: test_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, lhs
-; CHECK-NEXT: ldr q0, [x8, :lo12:lhs]
-; CHECK-NEXT: adrp x8, rhs
-; CHECK-NEXT: ldr q1, [x8, :lo12:rhs]
; CHECK-NEXT: b __multf3
-
- %lhs = load fp128, ptr @lhs, align 16
- %rhs = load fp128, ptr @rhs, align 16
-
%val = fmul fp128 %lhs, %rhs
ret fp128 %val
}
-define fp128 @test_div() {
+define fp128 @test_div(fp128 %lhs, fp128 %rhs) {
; CHECK-LABEL: test_div:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, lhs
-; CHECK-NEXT: ldr q0, [x8, :lo12:lhs]
-; CHECK-NEXT: adrp x8, rhs
-; CHECK-NEXT: ldr q1, [x8, :lo12:rhs]
; CHECK-NEXT: b __divtf3
-
- %lhs = load fp128, ptr @lhs, align 16
- %rhs = load fp128, ptr @rhs, align 16
-
%val = fdiv fp128 %lhs, %rhs
ret fp128 %val
}
- at var32 = dso_local global i32 0
- at var64 = dso_local global i64 0
-
-define dso_local void @test_fptosi() {
-; CHECK-LABEL: test_fptosi:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #32
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: adrp x8, lhs
-; CHECK-NEXT: ldr q0, [x8, :lo12:lhs]
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: bl __fixtfsi
-; CHECK-NEXT: adrp x8, var32
-; CHECK-NEXT: str w0, [x8, :lo12:var32]
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: bl __fixtfdi
-; CHECK-NEXT: adrp x8, var64
-; CHECK-NEXT: str x0, [x8, :lo12:var64]
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: ret
- %val = load fp128, ptr @lhs, align 16
-
+define i32 @test_fptosi_32(fp128 %val) {
+; CHECK-SD-LABEL: test_fptosi_32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __fixtfsi
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_fptosi_32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: b __fixtfsi
%val32 = fptosi fp128 %val to i32
- store i32 %val32, ptr @var32
+ ret i32 %val32
+}
+define i64 @test_fptosi_64(fp128 %val) {
+; CHECK-SD-LABEL: test_fptosi_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __fixtfdi
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_fptosi_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: b __fixtfdi
%val64 = fptosi fp128 %val to i64
- store i64 %val64, ptr @var64
-
- ret void
+ ret i64 %val64
}
-define dso_local void @test_fptoui() {
-; CHECK-LABEL: test_fptoui:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #32
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: adrp x8, lhs
-; CHECK-NEXT: ldr q0, [x8, :lo12:lhs]
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: bl __fixunstfsi
-; CHECK-NEXT: adrp x8, var32
-; CHECK-NEXT: str w0, [x8, :lo12:var32]
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: bl __fixunstfdi
-; CHECK-NEXT: adrp x8, var64
-; CHECK-NEXT: str x0, [x8, :lo12:var64]
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: ret
- %val = load fp128, ptr @lhs, align 16
-
+define i32 @test_fptoui_32(fp128 %val) {
+; CHECK-SD-LABEL: test_fptoui_32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __fixunstfsi
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_fptoui_32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: b __fixunstfsi
%val32 = fptoui fp128 %val to i32
- store i32 %val32, ptr @var32
+ ret i32 %val32
+}
+define i64 @test_fptoui_64(fp128 %val) {
+; CHECK-SD-LABEL: test_fptoui_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __fixunstfdi
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_fptoui_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: b __fixunstfdi
%val64 = fptoui fp128 %val to i64
- store i64 %val64, ptr @var64
-
- ret void
+ ret i64 %val64
}
-define dso_local void @test_sitofp() {
-; CHECK-LABEL: test_sitofp:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: adrp x8, var32
-; CHECK-NEXT: ldr w0, [x8, :lo12:var32]
-; CHECK-NEXT: bl __floatsitf
-; CHECK-NEXT: adrp x19, lhs
-; CHECK-NEXT: str q0, [x19, :lo12:lhs]
-; CHECK-NEXT: adrp x8, var64
-; CHECK-NEXT: ldr x0, [x8, :lo12:var64]
-; CHECK-NEXT: bl __floatditf
-; CHECK-NEXT: str q0, [x19, :lo12:lhs]
-; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT: ret
-
- %src32 = load i32, ptr @var32
+define fp128 @test_sitofp_32(i32 %src32) {
+; CHECK-SD-LABEL: test_sitofp_32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __floatsitf
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_sitofp_32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: b __floatsitf
%val32 = sitofp i32 %src32 to fp128
- store volatile fp128 %val32, ptr @lhs
+ ret fp128 %val32
+}
- %src64 = load i64, ptr @var64
+define fp128 @test_sitofp_64(i64 %src64) {
+; CHECK-SD-LABEL: test_sitofp_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __floatditf
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_sitofp_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: b __floatditf
%val64 = sitofp i64 %src64 to fp128
- store volatile fp128 %val64, ptr @lhs
-
- ret void
+ ret fp128 %val64
}
-define dso_local void @test_uitofp() {
-; CHECK-LABEL: test_uitofp:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: adrp x8, var32
-; CHECK-NEXT: ldr w0, [x8, :lo12:var32]
-; CHECK-NEXT: bl __floatunsitf
-; CHECK-NEXT: adrp x19, lhs
-; CHECK-NEXT: str q0, [x19, :lo12:lhs]
-; CHECK-NEXT: adrp x8, var64
-; CHECK-NEXT: ldr x0, [x8, :lo12:var64]
-; CHECK-NEXT: bl __floatunditf
-; CHECK-NEXT: str q0, [x19, :lo12:lhs]
-; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT: ret
-
- %src32 = load i32, ptr @var32
+define fp128 @test_uitofp_32(i32 %src32) {
+; CHECK-SD-LABEL: test_uitofp_32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __floatunsitf
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_uitofp_32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: b __floatunsitf
%val32 = uitofp i32 %src32 to fp128
- store volatile fp128 %val32, ptr @lhs
+ ret fp128 %val32
+}
- %src64 = load i64, ptr @var64
+define fp128 @test_uitofp_64(i64 %src64) {
+; CHECK-SD-LABEL: test_uitofp_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __floatunditf
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_uitofp_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: b __floatunditf
%val64 = uitofp i64 %src64 to fp128
- store volatile fp128 %val64, ptr @lhs
-
- ret void
+ ret fp128 %val64
}
-define dso_local i1 @test_setcc1() {
+; Technically, everything after the call to __letf2 is redundant, but we'll let
+; LLVM have its fun for now.
+define i1 @test_setcc1(fp128 %lhs, fp128 %rhs) {
; CHECK-LABEL: test_setcc1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: adrp x8, lhs
-; CHECK-NEXT: ldr q0, [x8, :lo12:lhs]
-; CHECK-NEXT: adrp x8, rhs
-; CHECK-NEXT: ldr q1, [x8, :lo12:rhs]
; CHECK-NEXT: bl __letf2
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: cset w0, le
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
-
- %lhs = load fp128, ptr @lhs, align 16
- %rhs = load fp128, ptr @rhs, align 16
-
-; Technically, everything after the call to __letf2 is redundant, but we'll let
-; LLVM have its fun for now.
%val = fcmp ole fp128 %lhs, %rhs
-
ret i1 %val
}
-define dso_local i1 @test_setcc2() {
+define i1 @test_setcc2(fp128 %lhs, fp128 %rhs) {
; CHECK-LABEL: test_setcc2:
; CHECK: // %bb.0:
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: adrp x8, lhs
-; CHECK-NEXT: ldr q0, [x8, :lo12:lhs]
-; CHECK-NEXT: adrp x8, rhs
-; CHECK-NEXT: ldr q1, [x8, :lo12:rhs]
; CHECK-NEXT: bl __letf2
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
-
- %lhs = load fp128, ptr @lhs, align 16
- %rhs = load fp128, ptr @rhs, align 16
-
%val = fcmp ugt fp128 %lhs, %rhs
+ ret i1 %val
+}
+define i1 @test_setcc3(fp128 %lhs, fp128 %rhs) {
+; CHECK-SD-LABEL: test_setcc3:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT: bl __eqtf2
+; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT: mov w19, w0
+; CHECK-SD-NEXT: bl __unordtf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ccmp w19, #0, #4, eq
+; CHECK-SD-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_setcc3:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #48
+; CHECK-GI-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT: bl __eqtf2
+; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w19, eq
+; CHECK-GI-NEXT: bl __unordtf2
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: orr w0, w19, w8
+; CHECK-GI-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #48
+; CHECK-GI-NEXT: ret
+ %val = fcmp ueq fp128 %lhs, %rhs
ret i1 %val
}
-define dso_local i1 @test_setcc3() {
-; CHECK-LABEL: test_setcc3:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #48
-; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: adrp x8, lhs
-; CHECK-NEXT: ldr q0, [x8, :lo12:lhs]
-; CHECK-NEXT: adrp x8, rhs
-; CHECK-NEXT: ldr q1, [x8, :lo12:rhs]
-; CHECK-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
-; CHECK-NEXT: bl __eqtf2
-; CHECK-NEXT: mov x19, x0
-; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
-; CHECK-NEXT: bl __unordtf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: ccmp w19, #0, #4, eq
-; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #48
-; CHECK-NEXT: ret
+; olt == !uge, which LLVM optimizes this to.
+define i32 @test_br_cc(fp128 %lhs, fp128 %rhs) {
+; CHECK-SD-LABEL: test_br_cc:
+; CHECK-SD: // %bb.0: // %common.ret
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __lttf2
+; CHECK-SD-NEXT: mov w8, #29 // =0x1d
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: mov w9, #42 // =0x2a
+; CHECK-SD-NEXT: csel w0, w9, w8, lt
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_br_cc:
+; CHECK-GI: // %bb.0: // %common.ret
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: bl __lttf2
+; CHECK-GI-NEXT: mov w8, #29 // =0x1d
+; CHECK-GI-NEXT: mov w9, #42 // =0x2a
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: csel w0, w9, w8, lt
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %cond = fcmp olt fp128 %lhs, %rhs
+ br i1 %cond, label %iftrue, label %iffalse
- %lhs = load fp128, ptr @lhs, align 16
- %rhs = load fp128, ptr @rhs, align 16
+iftrue:
+ ret i32 42
+iffalse:
+ ret i32 29
+}
- %val = fcmp ueq fp128 %lhs, %rhs
+define fp128 @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
+; CHECK-SD-LABEL: test_select:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: tst w0, #0x1
+; CHECK-SD-NEXT: b.eq .LBB16_2
+; CHECK-SD-NEXT: // %bb.1:
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: .LBB16_2:
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_select:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0x1
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: tst w8, #0x1
+; CHECK-GI-NEXT: fcsel d0, d0, d1, ne
+; CHECK-GI-NEXT: fcsel d1, d2, d3, ne
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
+ %val = select i1 %cond, fp128 %lhs, fp128 %rhs
+ ret fp128 %val
+}
- ret i1 %val
+define half @test_round_f16(fp128 %val) {
+; CHECK-SD-LABEL: test_round_f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __trunctfhf2
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_round_f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: b __trunctfhf2
+ %dst = fptrunc fp128 %val to half
+ ret half %dst
}
+define float @test_round_f32(fp128 %val) {
+; CHECK-SD-LABEL: test_round_f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __trunctfsf2
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_round_f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: b __trunctfsf2
+ %dst = fptrunc fp128 %val to float
+ ret float %dst
+}
+
+define double @test_round_f64(fp128 %val) {
+; CHECK-SD-LABEL: test_round_f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: bl __trunctfdf2
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_round_f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: b __trunctfdf2
+ %dst = fptrunc fp128 %val to double
+ ret double %dst
+}
-define dso_local i32 @test_br_cc() uwtable {
-; CHECK-LABEL: test_br_cc:
+define fp128 @test_extend_f16(half %val) {
+; CHECK-LABEL: test_extend_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: b __extendhftf2
+ %dst = fpext half %val to fp128
+ ret fp128 %dst
+}
+
+define fp128 @test_extend_f32(float %val) {
+; CHECK-LABEL: test_extend_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: b __extendsftf2
+ %dst = fpext float %val to fp128
+ ret fp128 %dst
+}
+
+define fp128 @test_extend_f64(double %val) {
+; CHECK-LABEL: test_extend_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: b __extenddftf2
+ %dst = fpext double %val to fp128
+ ret fp128 %dst
+}
+
+;; We convert this to fneg, and target-independent code expands it with
+;; integer operations.
+define fp128 @test_neg_sub(fp128 %in) {
+; CHECK-LABEL: test_neg_sub:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q0, [sp, #-16]!
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: .cfi_remember_state
-; CHECK-NEXT: adrp x8, lhs
-; CHECK-NEXT: ldr q0, [x8, :lo12:lhs]
-; CHECK-NEXT: adrp x8, rhs
-; CHECK-NEXT: ldr q1, [x8, :lo12:rhs]
-; CHECK-NEXT: bl __lttf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: b.ge .LBB11_2
-; CHECK-NEXT: // %bb.1: // %iftrue
-; CHECK-NEXT: mov w0, #42
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: ldrb w8, [sp, #15]
+; CHECK-NEXT: eor w8, w8, #0x80
+; CHECK-NEXT: strb w8, [sp, #15]
+; CHECK-NEXT: ldr q0, [sp], #16
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB11_2: // %iffalse
-; CHECK-NEXT: .cfi_restore_state
-; CHECK-NEXT: mov w0, #29
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: .cfi_restore w30
+ %ret = fsub fp128 0xL00000000000000008000000000000000, %in
+ ret fp128 %ret
+}
+
+define fp128 @test_neg(fp128 %in) {
+; CHECK-LABEL: test_neg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldrb w8, [sp, #15]
+; CHECK-NEXT: eor w8, w8, #0x80
+; CHECK-NEXT: strb w8, [sp, #15]
+; CHECK-NEXT: ldr q0, [sp], #16
; CHECK-NEXT: ret
+ %ret = fneg fp128 %in
+ ret fp128 %ret
+}
- %lhs = load fp128, ptr @lhs, align 16
- %rhs = load fp128, ptr @rhs, align 16
- ; olt == !uge, which LLVM optimizes this to.
- %cond = fcmp olt fp128 %lhs, %rhs
- br i1 %cond, label %iftrue, label %iffalse
-iftrue:
- ret i32 42
-iffalse:
- ret i32 29
+define <2 x fp128> @vec_add(<2 x fp128> %lhs, <2 x fp128> %rhs) {
+; CHECK-LABEL: vec_add:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: bl __addtf3
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-NEXT: bl __addtf3
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %val = fadd <2 x fp128> %lhs, %rhs
+ ret <2 x fp128> %val
}
-define dso_local void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
-; CHECK-LABEL: test_select:
+define <2 x fp128> @vec_sub(<2 x fp128> %lhs, <2 x fp128> %rhs) {
+; CHECK-LABEL: vec_sub:
; CHECK: // %bb.0:
-; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: b.eq .LBB12_2
-; CHECK-NEXT: // %bb.1:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: bl __subtf3
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-NEXT: bl __subtf3
; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: .LBB12_2:
-; CHECK-NEXT: adrp x8, lhs
-; CHECK-NEXT: str q1, [x8, :lo12:lhs]
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
-
- %val = select i1 %cond, fp128 %lhs, fp128 %rhs
- store fp128 %val, ptr @lhs, align 16
- ret void
+ %val = fsub <2 x fp128> %lhs, %rhs
+ ret <2 x fp128> %val
}
- at varhalf = dso_local global half 0.0, align 2
- at varfloat = dso_local global float 0.0, align 4
- at vardouble = dso_local global double 0.0, align 8
-
-define dso_local void @test_round() {
-; CHECK-LABEL: test_round:
+define <2 x fp128> @vec_mul(<2 x fp128> %lhs, <2 x fp128> %rhs) {
+; CHECK-LABEL: vec_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #32
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: adrp x8, lhs
-; CHECK-NEXT: ldr q0, [x8, :lo12:lhs]
+; CHECK-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: bl __multf3
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: bl __trunctfhf2
-; CHECK-NEXT: adrp x8, varhalf
-; CHECK-NEXT: str h0, [x8, :lo12:varhalf]
+; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-NEXT: bl __multf3
+; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: bl __trunctfsf2
-; CHECK-NEXT: adrp x8, varfloat
-; CHECK-NEXT: str s0, [x8, :lo12:varfloat]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %val = fmul <2 x fp128> %lhs, %rhs
+ ret <2 x fp128> %val
+}
+
+define <2 x fp128> @vec_div(<2 x fp128> %lhs, <2 x fp128> %rhs) {
+; CHECK-LABEL: vec_div:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT: mov v1.16b, v2.16b
+; CHECK-NEXT: bl __divtf3
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-NEXT: bl __divtf3
+; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: bl __trunctfdf2
-; CHECK-NEXT: adrp x8, vardouble
-; CHECK-NEXT: str d0, [x8, :lo12:vardouble]
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
+ %val = fdiv <2 x fp128> %lhs, %rhs
+ ret <2 x fp128> %val
+}
- %val = load fp128, ptr @lhs, align 16
+define <2 x i32> @vec_fptosi_32(<2 x fp128> %val) {
+; CHECK-SD-LABEL: vec_fptosi_32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: bl __fixtfsi
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: bl __fixtfsi
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v0.s[1], w0
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_fptosi_32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #32
+; CHECK-GI-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __fixtfsi
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov w19, w0
+; CHECK-GI-NEXT: bl __fixtfsi
+; CHECK-GI-NEXT: mov v0.s[0], w19
+; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v0.s[1], w0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: add sp, sp, #32
+; CHECK-GI-NEXT: ret
+ %val32 = fptosi <2 x fp128> %val to <2 x i32>
+ ret <2 x i32> %val32
+}
- %half = fptrunc fp128 %val to half
- store half %half, ptr @varhalf, align 2
+define <2 x i64> @vec_fptosi_64(<2 x fp128> %val) {
+; CHECK-SD-LABEL: vec_fptosi_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: bl __fixtfdi
+; CHECK-SD-NEXT: fmov d0, x0
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: bl __fixtfdi
+; CHECK-SD-NEXT: fmov d0, x0
+; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_fptosi_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #32
+; CHECK-GI-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __fixtfdi
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov x19, x0
+; CHECK-GI-NEXT: bl __fixtfdi
+; CHECK-GI-NEXT: mov v0.d[0], x19
+; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v0.d[1], x0
+; CHECK-GI-NEXT: add sp, sp, #32
+; CHECK-GI-NEXT: ret
+ %val64 = fptosi <2 x fp128> %val to <2 x i64>
+ ret <2 x i64> %val64
+}
- %float = fptrunc fp128 %val to float
- store float %float, ptr @varfloat, align 4
+define <2 x i32> @vec_fptoui_32(<2 x fp128> %val) {
+; CHECK-SD-LABEL: vec_fptoui_32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: bl __fixunstfsi
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: bl __fixunstfsi
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v0.s[1], w0
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_fptoui_32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #32
+; CHECK-GI-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __fixunstfsi
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov w19, w0
+; CHECK-GI-NEXT: bl __fixunstfsi
+; CHECK-GI-NEXT: mov v0.s[0], w19
+; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v0.s[1], w0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: add sp, sp, #32
+; CHECK-GI-NEXT: ret
+ %val32 = fptoui <2 x fp128> %val to <2 x i32>
+ ret <2 x i32> %val32
+}
- %double = fptrunc fp128 %val to double
- store double %double, ptr @vardouble, align 8
+define <2 x i64> @vec_fptoui_64(<2 x fp128> %val) {
+; CHECK-SD-LABEL: vec_fptoui_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: bl __fixunstfdi
+; CHECK-SD-NEXT: fmov d0, x0
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: bl __fixunstfdi
+; CHECK-SD-NEXT: fmov d0, x0
+; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_fptoui_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #32
+; CHECK-GI-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __fixunstfdi
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov x19, x0
+; CHECK-GI-NEXT: bl __fixunstfdi
+; CHECK-GI-NEXT: mov v0.d[0], x19
+; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v0.d[1], x0
+; CHECK-GI-NEXT: add sp, sp, #32
+; CHECK-GI-NEXT: ret
+ %val64 = fptoui <2 x fp128> %val to <2 x i64>
+ ret <2 x i64> %val64
+}
- ret void
+define <2 x fp128> @vec_sitofp_32(<2 x i32> %src32) {
+; CHECK-SD-LABEL: vec_sitofp_32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #32
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: bl __floatsitf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov w0, v1.s[1]
+; CHECK-SD-NEXT: bl __floatsitf
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #32
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_sitofp_32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #32
+; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset b8, -16
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: mov s8, v0.s[1]
+; CHECK-GI-NEXT: bl __floatsitf
+; CHECK-GI-NEXT: fmov w0, s8
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __floatsitf
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #32
+; CHECK-GI-NEXT: ret
+ %val32 = sitofp <2 x i32> %src32 to <2 x fp128>
+ ret <2 x fp128> %val32
}
-define dso_local void @test_extend() {
-; CHECK-LABEL: test_extend:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: adrp x19, lhs
-; CHECK-NEXT: adrp x8, varhalf
-; CHECK-NEXT: ldr h0, [x8, :lo12:varhalf]
-; CHECK-NEXT: bl __extendhftf2
-; CHECK-NEXT: str q0, [x19, :lo12:lhs]
-; CHECK-NEXT: adrp x8, varfloat
-; CHECK-NEXT: ldr s0, [x8, :lo12:varfloat]
-; CHECK-NEXT: bl __extendsftf2
-; CHECK-NEXT: str q0, [x19, :lo12:lhs]
-; CHECK-NEXT: adrp x8, vardouble
-; CHECK-NEXT: ldr d0, [x8, :lo12:vardouble]
-; CHECK-NEXT: bl __extenddftf2
-; CHECK-NEXT: str q0, [x19, :lo12:lhs]
-; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT: ret
+define <2 x fp128> @vec_sitofp_64(<2 x i64> %src64) {
+; CHECK-SD-LABEL: vec_sitofp_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: bl __floatditf
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov x0, v0.d[1]
+; CHECK-SD-NEXT: bl __floatditf
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_sitofp_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #32
+; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset b8, -16
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: mov d8, v0.d[1]
+; CHECK-GI-NEXT: bl __floatditf
+; CHECK-GI-NEXT: fmov x0, d8
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __floatditf
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #32
+; CHECK-GI-NEXT: ret
+ %val64 = sitofp <2 x i64> %src64 to <2 x fp128>
+ ret <2 x fp128> %val64
+}
- %val = load fp128, ptr @lhs, align 16
+define <2 x fp128> @vec_uitofp_32(<2 x i32> %src32) {
+; CHECK-SD-LABEL: vec_uitofp_32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #32
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: bl __floatunsitf
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov w0, v1.s[1]
+; CHECK-SD-NEXT: bl __floatunsitf
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #32
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_uitofp_32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #32
+; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset b8, -16
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: mov s8, v0.s[1]
+; CHECK-GI-NEXT: bl __floatunsitf
+; CHECK-GI-NEXT: fmov w0, s8
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __floatunsitf
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #32
+; CHECK-GI-NEXT: ret
+ %val32 = uitofp <2 x i32> %src32 to <2 x fp128>
+ ret <2 x fp128> %val32
+}
- %half = load half, ptr @varhalf
- %fromhalf = fpext half %half to fp128
- store volatile fp128 %fromhalf, ptr @lhs, align 16
+define <2 x fp128> @vec_uitofp_64(<2 x i64> %src64) {
+; CHECK-SD-LABEL: vec_uitofp_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: bl __floatunditf
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov x0, v0.d[1]
+; CHECK-SD-NEXT: bl __floatunditf
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_uitofp_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #32
+; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset b8, -16
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: mov d8, v0.d[1]
+; CHECK-GI-NEXT: bl __floatunditf
+; CHECK-GI-NEXT: fmov x0, d8
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __floatunditf
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #32
+; CHECK-GI-NEXT: ret
+ %val64 = uitofp <2 x i64> %src64 to <2 x fp128>
+ ret <2 x fp128> %val64
+}
- %float = load float, ptr @varfloat
- %fromfloat = fpext float %float to fp128
- store volatile fp128 %fromfloat, ptr @lhs, align 16
+define <2 x i1> @vec_setcc1(<2 x fp128> %lhs, <2 x fp128> %rhs) {
+; CHECK-SD-LABEL: vec_setcc1:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -8
+; CHECK-SD-NEXT: .cfi_offset b8, -16
+; CHECK-SD-NEXT: stp q0, q2, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
+; CHECK-SD-NEXT: bl __letf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT: cset w8, le
+; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
+; CHECK-SD-NEXT: fmov d8, x8
+; CHECK-SD-NEXT: bl __letf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-SD-NEXT: cset w8, le
+; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: zip1 v0.2s, v0.2s, v8.2s
+; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_setcc1:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #48
+; CHECK-GI-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: stp q3, q1, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT: mov v1.16b, v2.16b
+; CHECK-GI-NEXT: bl __letf2
+; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w19, le
+; CHECK-GI-NEXT: bl __letf2
+; CHECK-GI-NEXT: mov v0.s[0], w19
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w8, le
+; CHECK-GI-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v0.s[1], w8
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: add sp, sp, #48
+; CHECK-GI-NEXT: ret
+ %val = fcmp ole <2 x fp128> %lhs, %rhs
+ ret <2 x i1> %val
+}
- %double = load double, ptr @vardouble
- %fromdouble = fpext double %double to fp128
- store volatile fp128 %fromdouble, ptr @lhs, align 16
+define <2 x i1> @vec_setcc2(<2 x fp128> %lhs, <2 x fp128> %rhs) {
+; CHECK-SD-LABEL: vec_setcc2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str d8, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -8
+; CHECK-SD-NEXT: .cfi_offset b8, -16
+; CHECK-SD-NEXT: stp q0, q2, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
+; CHECK-SD-NEXT: bl __letf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT: cset w8, gt
+; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
+; CHECK-SD-NEXT: fmov d8, x8
+; CHECK-SD-NEXT: bl __letf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-SD-NEXT: cset w8, gt
+; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: zip1 v0.2s, v0.2s, v8.2s
+; CHECK-SD-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_setcc2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #48
+; CHECK-GI-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: stp q3, q1, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT: mov v1.16b, v2.16b
+; CHECK-GI-NEXT: bl __letf2
+; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w19, gt
+; CHECK-GI-NEXT: bl __letf2
+; CHECK-GI-NEXT: mov v0.s[0], w19
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v0.s[1], w8
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: add sp, sp, #48
+; CHECK-GI-NEXT: ret
+ %val = fcmp ugt <2 x fp128> %lhs, %rhs
+ ret <2 x i1> %val
+}
- ret void
+define <2 x i1> @vec_setcc3(<2 x fp128> %lhs, <2 x fp128> %rhs) {
+; CHECK-SD-LABEL: vec_setcc3:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #96
+; CHECK-SD-NEXT: str d8, [sp, #64] // 8-byte Folded Spill
+; CHECK-SD-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 96
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: .cfi_offset b8, -32
+; CHECK-SD-NEXT: stp q1, q3, [sp] // 32-byte Folded Spill
+; CHECK-SD-NEXT: stp q0, q2, [sp, #32] // 32-byte Folded Spill
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
+; CHECK-SD-NEXT: bl __eqtf2
+; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT: mov w19, w0
+; CHECK-SD-NEXT: bl __unordtf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT: ccmp w19, #0, #4, eq
+; CHECK-SD-NEXT: cset w8, eq
+; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
+; CHECK-SD-NEXT: fmov d8, x8
+; CHECK-SD-NEXT: bl __eqtf2
+; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT: mov w19, w0
+; CHECK-SD-NEXT: bl __unordtf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ccmp w19, #0, #4, eq
+; CHECK-SD-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT: cset w8, eq
+; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: zip1 v0.2s, v0.2s, v8.2s
+; CHECK-SD-NEXT: ldr d8, [sp, #64] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #96
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_setcc3:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #96
+; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT: stp q3, q1, [sp, #32] // 32-byte Folded Spill
+; CHECK-GI-NEXT: mov v1.16b, v2.16b
+; CHECK-GI-NEXT: bl __eqtf2
+; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w19, eq
+; CHECK-GI-NEXT: bl __unordtf2
+; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: orr w19, w19, w8
+; CHECK-GI-NEXT: bl __eqtf2
+; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w20, eq
+; CHECK-GI-NEXT: bl __unordtf2
+; CHECK-GI-NEXT: mov v0.s[0], w19
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: orr w8, w20, w8
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v0.s[1], w8
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: add sp, sp, #96
+; CHECK-GI-NEXT: ret
+ %val = fcmp ueq <2 x fp128> %lhs, %rhs
+ ret <2 x i1> %val
}
-define fp128 @test_neg(fp128 %in) {
-; CHECK-LABEL: test_neg:
+define <2 x fp128> @vec_select(<2 x i1> %cond, <2 x fp128> %lhs, <2 x fp128> %rhs) {
+; CHECK-SD-LABEL: vec_select:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: fmov w8, s0
+; CHECK-SD-NEXT: tst w8, #0x1
+; CHECK-SD-NEXT: b.eq .LBB40_2
+; CHECK-SD-NEXT: // %bb.1:
+; CHECK-SD-NEXT: mov v3.16b, v1.16b
+; CHECK-SD-NEXT: .LBB40_2:
+; CHECK-SD-NEXT: mov w8, v0.s[1]
+; CHECK-SD-NEXT: tst w8, #0x1
+; CHECK-SD-NEXT: b.eq .LBB40_4
+; CHECK-SD-NEXT: // %bb.3:
+; CHECK-SD-NEXT: mov v4.16b, v2.16b
+; CHECK-SD-NEXT: .LBB40_4:
+; CHECK-SD-NEXT: mov v0.16b, v3.16b
+; CHECK-SD-NEXT: mov v1.16b, v4.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_select:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov w8, v0.s[1]
+; CHECK-GI-NEXT: fmov w9, s0
+; CHECK-GI-NEXT: mov d5, v1.d[1]
+; CHECK-GI-NEXT: mov d6, v3.d[1]
+; CHECK-GI-NEXT: and w9, w9, #0x1
+; CHECK-GI-NEXT: tst w9, #0x1
+; CHECK-GI-NEXT: and w8, w8, #0x1
+; CHECK-GI-NEXT: fcsel d0, d1, d3, ne
+; CHECK-GI-NEXT: fcsel d3, d5, d6, ne
+; CHECK-GI-NEXT: tst w8, #0x1
+; CHECK-GI-NEXT: mov d1, v2.d[1]
+; CHECK-GI-NEXT: mov d5, v4.d[1]
+; CHECK-GI-NEXT: fcsel d2, d2, d4, ne
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x9, d2
+; CHECK-GI-NEXT: fcsel d2, d1, d5, ne
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: fmov x8, d3
+; CHECK-GI-NEXT: mov v1.d[0], x9
+; CHECK-GI-NEXT: fmov x9, d2
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: ret
+ %val = select <2 x i1> %cond, <2 x fp128> %lhs, <2 x fp128> %rhs
+ ret <2 x fp128> %val
+}
+
+define <2 x half> @vec_round_f16(<2 x fp128> %val) {
+; CHECK-SD-LABEL: vec_round_f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: bl __trunctfhf2
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 def $q0
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: bl __trunctfhf2
+; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 def $q0
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_round_f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #64
+; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: mov v2.d[0], x8
+; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: mov v2.d[1], x8
+; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __trunctfhf2
+; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: bl __trunctfhf2
+; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: bl __trunctfhf2
+; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: bl __trunctfhf2
+; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: add sp, sp, #64
+; CHECK-GI-NEXT: ret
+ %dst = fptrunc <2 x fp128> %val to <2 x half>
+ ret <2 x half> %dst
+}
+
+define <2 x float> @vec_round_f32(<2 x fp128> %val) {
+; CHECK-SD-LABEL: vec_round_f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: bl __trunctfsf2
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: bl __trunctfsf2
+; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_round_f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #48
+; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __trunctfsf2
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: bl __trunctfsf2
+; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.s[1], v0.s[0]
+; CHECK-GI-NEXT: fmov d0, d1
+; CHECK-GI-NEXT: add sp, sp, #48
+; CHECK-GI-NEXT: ret
+ %dst = fptrunc <2 x fp128> %val to <2 x float>
+ ret <2 x float> %dst
+}
+
+define <2 x double> @vec_round_f64(<2 x fp128> %val) {
+; CHECK-SD-LABEL: vec_round_f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov v0.16b, v1.16b
+; CHECK-SD-NEXT: bl __trunctfdf2
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: bl __trunctfdf2
+; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_round_f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #48
+; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: bl __trunctfdf2
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: bl __trunctfdf2
+; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NEXT: add sp, sp, #48
+; CHECK-GI-NEXT: ret
+ %dst = fptrunc <2 x fp128> %val to <2 x double>
+ ret <2 x double> %dst
+}
+
+define <2 x fp128> @vec_extend_f16(<2 x half> %val) {
+; CHECK-SD-LABEL: vec_extend_f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #32
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT: bl __extendhftf2
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov h1, v1.h[1]
+; CHECK-SD-NEXT: fmov s0, s1
+; CHECK-SD-NEXT: bl __extendhftf2
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #32
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_extend_f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #32
+; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset b8, -16
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov h8, v0.h[1]
+; CHECK-GI-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-GI-NEXT: bl __extendhftf2
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s0, s8
+; CHECK-GI-NEXT: bl __extendhftf2
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #32
+; CHECK-GI-NEXT: ret
+ %dst = fpext <2 x half> %val to <2 x fp128>
+ ret <2 x fp128> %dst
+}
+
+define <2 x fp128> @vec_extend_f32(<2 x float> %val) {
+; CHECK-SD-LABEL: vec_extend_f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #32
+; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: bl __extendsftf2
+; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: mov s1, v1.s[1]
+; CHECK-SD-NEXT: fmov s0, s1
+; CHECK-SD-NEXT: bl __extendsftf2
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #32
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_extend_f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #32
+; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset b8, -16
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov s8, v0.s[1]
+; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT: bl __extendsftf2
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov s0, s8
+; CHECK-GI-NEXT: bl __extendsftf2
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #32
+; CHECK-GI-NEXT: ret
+ %dst = fpext <2 x float> %val to <2 x fp128>
+ ret <2 x fp128> %dst
+}
+
+define <2 x fp128> @vec_extend_f64(<2 x double> %val) {
+; CHECK-SD-LABEL: vec_extend_f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: bl __extenddftf2
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT: mov d0, v0.d[1]
+; CHECK-SD-NEXT: bl __extenddftf2
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #48
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vec_extend_f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #32
+; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill
+; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset b8, -16
+; CHECK-GI-NEXT: mov d8, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: bl __extenddftf2
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-GI-NEXT: fmov d0, d8
+; CHECK-GI-NEXT: bl __extenddftf2
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #32
+; CHECK-GI-NEXT: ret
+ %dst = fpext <2 x double> %val to <2 x fp128>
+ ret <2 x fp128> %dst
+}
+
+define <2 x fp128> @vec_neg_sub(<2 x fp128> %in) {
+; CHECK-LABEL: vec_neg_sub:
; CHECK: // %bb.0:
-; CHECK-NEXT: str q0, [sp, #-16]!
-; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: adrp x8, .LCPI47_0
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI47_0]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: bl __subtf3
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: bl __subtf3
+; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %ret = fsub <2 x fp128> zeroinitializer, %in
+ ret <2 x fp128> %ret
+}
+
+define <2 x fp128> @vec_neg(<2 x fp128> %in) {
+; CHECK-LABEL: vec_neg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp q0, q1, [sp, #-32]!
+; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: ldrb w8, [sp, #15]
; CHECK-NEXT: eor w8, w8, #0x80
; CHECK-NEXT: strb w8, [sp, #15]
-; CHECK-NEXT: ldr q0, [sp], #16
+; CHECK-NEXT: ldrb w8, [sp, #31]
+; CHECK-NEXT: eor w8, w8, #0x80
+; CHECK-NEXT: strb w8, [sp, #31]
+; CHECK-NEXT: ldp q0, q1, [sp], #32
; CHECK-NEXT: ret
-
-;; We convert this to fneg, and target-independent code expands it with
-;; integer operations.
- %ret = fsub fp128 0xL00000000000000008000000000000000, %in
- ret fp128 %ret
-
+ %ret = fneg <2 x fp128> %in
+ ret <2 x fp128> %ret
}
More information about the llvm-commits
mailing list