[llvm] 2e3f252 - [AArch64] Add global isel coverage for fp16 tests and strict-fp. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 29 08:54:56 PDT 2025
Author: David Green
Date: 2025-09-29T16:54:49+01:00
New Revision: 2e3f2523e624a4a922c386f6f1264c19f25a2e26
URL: https://github.com/llvm/llvm-project/commit/2e3f2523e624a4a922c386f6f1264c19f25a2e26
DIFF: https://github.com/llvm/llvm-project/commit/2e3f2523e624a4a922c386f6f1264c19f25a2e26.diff
LOG: [AArch64] Add global isel coverage for fp16 tests and strict-fp. NFC
Added:
Modified:
llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
llvm/test/CodeGen/AArch64/strict-fp-opt.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
index 8bc3497ad3c3c..6233ce743b706 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
@@ -1,20 +1,30 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-CVT
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-FP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT,CHECK-CVT-SD
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-FP16-SD
+; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-CVT,CHECK-CVT-GI
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-FP16-GI
define <4 x half> @add_h(<4 x half> %a, <4 x half> %b) {
-; CHECK-CVT-LABEL: add_h:
-; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: add_h:
+; CHECK-CVT-SD: // %bb.0: // %entry
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: add_h:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fadd v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: add_h:
+; CHECK-CVT-GI: // %bb.0: // %entry
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
entry:
%0 = fadd <4 x half> %a, %b
@@ -22,28 +32,54 @@ entry:
}
define <4 x half> @build_h4(<4 x half> %a) {
-; CHECK-COMMON-LABEL: build_h4:
-; CHECK-COMMON: // %bb.0: // %entry
-; CHECK-COMMON-NEXT: mov w8, #15565 // =0x3ccd
-; CHECK-COMMON-NEXT: dup v0.4h, w8
-; CHECK-COMMON-NEXT: ret
+; CHECK-CVT-SD-LABEL: build_h4:
+; CHECK-CVT-SD: // %bb.0: // %entry
+; CHECK-CVT-SD-NEXT: mov w8, #15565 // =0x3ccd
+; CHECK-CVT-SD-NEXT: dup v0.4h, w8
+; CHECK-CVT-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: build_h4:
+; CHECK-FP16-SD: // %bb.0: // %entry
+; CHECK-FP16-SD-NEXT: mov w8, #15565 // =0x3ccd
+; CHECK-FP16-SD-NEXT: dup v0.4h, w8
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: build_h4:
+; CHECK-CVT-GI: // %bb.0: // %entry
+; CHECK-CVT-GI-NEXT: adrp x8, .LCPI1_0
+; CHECK-CVT-GI-NEXT: ldr d0, [x8, :lo12:.LCPI1_0]
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: build_h4:
+; CHECK-FP16-GI: // %bb.0: // %entry
+; CHECK-FP16-GI-NEXT: adrp x8, .LCPI1_0
+; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI1_0]
+; CHECK-FP16-GI-NEXT: ret
entry:
ret <4 x half> <half 0xH3CCD, half 0xH3CCD, half 0xH3CCD, half 0xH3CCD>
}
define <4 x half> @sub_h(<4 x half> %a, <4 x half> %b) {
-; CHECK-CVT-LABEL: sub_h:
-; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fsub v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: sub_h:
+; CHECK-CVT-SD: // %bb.0: // %entry
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: sub_h:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fsub v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: sub_h:
+; CHECK-CVT-GI: // %bb.0: // %entry
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fsub v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
entry:
%0 = fsub <4 x half> %a, %b
@@ -51,18 +87,26 @@ entry:
}
define <4 x half> @mul_h(<4 x half> %a, <4 x half> %b) {
-; CHECK-CVT-LABEL: mul_h:
-; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fmul v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: mul_h:
+; CHECK-CVT-SD: // %bb.0: // %entry
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: mul_h:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: mul_h:
+; CHECK-CVT-GI: // %bb.0: // %entry
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
entry:
%0 = fmul <4 x half> %a, %b
@@ -70,18 +114,26 @@ entry:
}
define <4 x half> @div_h(<4 x half> %a, <4 x half> %b) {
-; CHECK-CVT-LABEL: div_h:
-; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fdiv v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: div_h:
+; CHECK-CVT-SD: // %bb.0: // %entry
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fdiv v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: div_h:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fdiv v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: div_h:
+; CHECK-CVT-GI: // %bb.0: // %entry
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fdiv v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
entry:
%0 = fdiv <4 x half> %a, %b
@@ -89,92 +141,162 @@ entry:
}
define <4 x half> @load_h(ptr %a) {
-; CHECK-COMMON-LABEL: load_h:
-; CHECK-COMMON: // %bb.0: // %entry
-; CHECK-COMMON-NEXT: ldr d0, [x0]
-; CHECK-COMMON-NEXT: ret
+; CHECK-LABEL: load_h:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ret
entry:
%0 = load <4 x half>, ptr %a, align 4
ret <4 x half> %0
}
define void @store_h(ptr %a, <4 x half> %b) {
-; CHECK-COMMON-LABEL: store_h:
-; CHECK-COMMON: // %bb.0: // %entry
-; CHECK-COMMON-NEXT: str d0, [x0]
-; CHECK-COMMON-NEXT: ret
+; CHECK-LABEL: store_h:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
entry:
store <4 x half> %b, ptr %a, align 4
ret void
}
define <4 x half> @s_to_h(<4 x float> %a) {
-; CHECK-COMMON-LABEL: s_to_h:
-; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-COMMON-NEXT: ret
+; CHECK-LABEL: s_to_h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-NEXT: ret
%1 = fptrunc <4 x float> %a to <4 x half>
ret <4 x half> %1
}
define <4 x half> @d_to_h(<4 x double> %a) {
-; CHECK-COMMON-LABEL: d_to_h:
-; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: fcvtxn v0.2s, v0.2d
-; CHECK-COMMON-NEXT: fcvtxn2 v0.4s, v1.2d
-; CHECK-COMMON-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-COMMON-NEXT: ret
+; CHECK-CVT-SD-LABEL: d_to_h:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtxn v0.2s, v0.2d
+; CHECK-CVT-SD-NEXT: fcvtxn2 v0.4s, v1.2d
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: d_to_h:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: fcvtxn v0.2s, v0.2d
+; CHECK-FP16-SD-NEXT: fcvtxn2 v0.4s, v1.2d
+; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: d_to_h:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: mov d2, v0.d[1]
+; CHECK-CVT-GI-NEXT: fcvt h0, d0
+; CHECK-CVT-GI-NEXT: mov d3, v1.d[1]
+; CHECK-CVT-GI-NEXT: fcvt h1, d1
+; CHECK-CVT-GI-NEXT: fcvt h2, d2
+; CHECK-CVT-GI-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-CVT-GI-NEXT: fcvt h2, d3
+; CHECK-CVT-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-CVT-GI-NEXT: mov v0.h[3], v2.h[0]
+; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: d_to_h:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: mov d2, v0.d[1]
+; CHECK-FP16-GI-NEXT: fcvt h0, d0
+; CHECK-FP16-GI-NEXT: mov d3, v1.d[1]
+; CHECK-FP16-GI-NEXT: fcvt h1, d1
+; CHECK-FP16-GI-NEXT: fcvt h2, d2
+; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-FP16-GI-NEXT: fcvt h2, d3
+; CHECK-FP16-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-FP16-GI-NEXT: mov v0.h[3], v2.h[0]
+; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-FP16-GI-NEXT: ret
%1 = fptrunc <4 x double> %a to <4 x half>
ret <4 x half> %1
}
define <4 x float> @h_to_s(<4 x half> %a) {
-; CHECK-COMMON-LABEL: h_to_s:
-; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-COMMON-NEXT: ret
+; CHECK-LABEL: h_to_s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: ret
%1 = fpext <4 x half> %a to <4 x float>
ret <4 x float> %1
}
define <4 x double> @h_to_d(<4 x half> %a) {
-; CHECK-COMMON-LABEL: h_to_d:
-; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-COMMON-NEXT: fcvtl2 v1.2d, v0.4s
-; CHECK-COMMON-NEXT: fcvtl v0.2d, v0.2s
-; CHECK-COMMON-NEXT: ret
+; CHECK-CVT-SD-LABEL: h_to_d:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtl2 v1.2d, v0.4s
+; CHECK-CVT-SD-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-CVT-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: h_to_d:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-FP16-SD-NEXT: fcvtl2 v1.2d, v0.4s
+; CHECK-FP16-SD-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: h_to_d:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
+; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
+; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
+; CHECK-CVT-GI-NEXT: fcvt d0, h0
+; CHECK-CVT-GI-NEXT: fcvt d4, h1
+; CHECK-CVT-GI-NEXT: fcvt d1, h2
+; CHECK-CVT-GI-NEXT: fcvt d2, h3
+; CHECK-CVT-GI-NEXT: mov v0.d[1], v4.d[0]
+; CHECK-CVT-GI-NEXT: mov v1.d[1], v2.d[0]
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: h_to_d:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
+; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
+; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
+; CHECK-FP16-GI-NEXT: fcvt d0, h0
+; CHECK-FP16-GI-NEXT: fcvt d4, h1
+; CHECK-FP16-GI-NEXT: fcvt d1, h2
+; CHECK-FP16-GI-NEXT: fcvt d2, h3
+; CHECK-FP16-GI-NEXT: mov v0.d[1], v4.d[0]
+; CHECK-FP16-GI-NEXT: mov v1.d[1], v2.d[0]
+; CHECK-FP16-GI-NEXT: ret
%1 = fpext <4 x half> %a to <4 x double>
ret <4 x double> %1
}
define <4 x half> @bitcast_i_to_h(float, <4 x i16> %a) {
-; CHECK-COMMON-LABEL: bitcast_i_to_h:
-; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: fmov d0, d1
-; CHECK-COMMON-NEXT: ret
+; CHECK-LABEL: bitcast_i_to_h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: ret
%2 = bitcast <4 x i16> %a to <4 x half>
ret <4 x half> %2
}
define <4 x i16> @bitcast_h_to_i(float, <4 x half> %a) {
-; CHECK-COMMON-LABEL: bitcast_h_to_i:
-; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: fmov d0, d1
-; CHECK-COMMON-NEXT: ret
+; CHECK-LABEL: bitcast_h_to_i:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d0, d1
+; CHECK-NEXT: ret
%2 = bitcast <4 x half> %a to <4 x i16>
ret <4 x i16> %2
}
define <4 x half> @sitofp_i8(<4 x i8> %a) #0 {
-; CHECK-CVT-LABEL: sitofp_i8:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: shl v0.4h, v0.4h, #8
-; CHECK-CVT-NEXT: sshr v0.4h, v0.4h, #8
-; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: sitofp_i8:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-CVT-SD-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-CVT-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-CVT-SD-NEXT: scvtf v0.4s, v0.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: sitofp_i8:
; CHECK-FP16: // %bb.0:
@@ -182,6 +304,15 @@ define <4 x half> @sitofp_i8(<4 x i8> %a) #0 {
; CHECK-FP16-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-FP16-NEXT: scvtf v0.4h, v0.4h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: sitofp_i8:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-CVT-GI-NEXT: shl v0.4s, v0.4s, #24
+; CHECK-CVT-GI-NEXT: sshr v0.4s, v0.4s, #24
+; CHECK-CVT-GI-NEXT: scvtf v0.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = sitofp <4 x i8> %a to <4 x half>
ret <4 x half> %1
}
@@ -204,43 +335,59 @@ define <4 x half> @sitofp_i16(<4 x i16> %a) #0 {
define <4 x half> @sitofp_i32(<4 x i32> %a) #0 {
-; CHECK-COMMON-LABEL: sitofp_i32:
-; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: scvtf v0.4s, v0.4s
-; CHECK-COMMON-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-COMMON-NEXT: ret
+; CHECK-LABEL: sitofp_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtf v0.4s, v0.4s
+; CHECK-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-NEXT: ret
%1 = sitofp <4 x i32> %a to <4 x half>
ret <4 x half> %1
}
define <4 x half> @sitofp_i64(<4 x i64> %a) #0 {
-; CHECK-COMMON-LABEL: sitofp_i64:
-; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: scvtf v0.2d, v0.2d
-; CHECK-COMMON-NEXT: scvtf v1.2d, v1.2d
-; CHECK-COMMON-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-COMMON-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-COMMON-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-COMMON-NEXT: ret
+; CHECK-LABEL: sitofp_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: scvtf v0.2d, v0.2d
+; CHECK-NEXT: scvtf v1.2d, v1.2d
+; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-NEXT: ret
%1 = sitofp <4 x i64> %a to <4 x half>
ret <4 x half> %1
}
define <4 x half> @uitofp_i8(<4 x i8> %a) #0 {
-; CHECK-CVT-LABEL: uitofp_i8:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: uitofp_i8:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-CVT-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-CVT-SD-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-LABEL: uitofp_i8:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-FP16-NEXT: ucvtf v0.4h, v0.4h
-; CHECK-FP16-NEXT: ret
+; CHECK-FP16-SD-LABEL: uitofp_i8:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-FP16-SD-NEXT: ucvtf v0.4h, v0.4h
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: uitofp_i8:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: movi v1.2d, #0x0000ff000000ff
+; CHECK-CVT-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-CVT-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-CVT-GI-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: uitofp_i8:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: movi d1, #0xff00ff00ff00ff
+; CHECK-FP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-FP16-GI-NEXT: ucvtf v0.4h, v0.4h
+; CHECK-FP16-GI-NEXT: ret
%1 = uitofp <4 x i8> %a to <4 x half>
ret <4 x half> %1
}
@@ -264,35 +411,35 @@ define <4 x half> @uitofp_i16(<4 x i16> %a) #0 {
define <4 x half> @uitofp_i32(<4 x i32> %a) #0 {
-; CHECK-COMMON-LABEL: uitofp_i32:
-; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: ucvtf v0.4s, v0.4s
-; CHECK-COMMON-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-COMMON-NEXT: ret
+; CHECK-LABEL: uitofp_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-NEXT: ret
%1 = uitofp <4 x i32> %a to <4 x half>
ret <4 x half> %1
}
define <4 x half> @uitofp_i64(<4 x i64> %a) #0 {
-; CHECK-COMMON-LABEL: uitofp_i64:
-; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-COMMON-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-COMMON-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-COMMON-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-COMMON-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-COMMON-NEXT: ret
+; CHECK-LABEL: uitofp_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: ucvtf v1.2d, v1.2d
+; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-NEXT: ret
%1 = uitofp <4 x i64> %a to <4 x half>
ret <4 x half> %1
}
define void @test_insert_at_zero(half %a, ptr %b) #0 {
-; CHECK-COMMON-LABEL: test_insert_at_zero:
-; CHECK-COMMON: // %bb.0:
-; CHECK-COMMON-NEXT: // kill: def $h0 killed $h0 def $d0
-; CHECK-COMMON-NEXT: str d0, [x0]
-; CHECK-COMMON-NEXT: ret
+; CHECK-LABEL: test_insert_at_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $d0
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
%1 = insertelement <4 x half> undef, half %a, i64 0
store <4 x half> %1, ptr %b, align 4
ret void
@@ -331,17 +478,29 @@ define <4 x i16> @fptosi_i16(<4 x half> %a) #0 {
}
define <4 x i8> @fptoui_i8(<4 x half> %a) #0 {
-; CHECK-CVT-LABEL: fptoui_i8:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: fptoui_i8:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-LABEL: fptoui_i8:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h
-; CHECK-FP16-NEXT: ret
+; CHECK-FP16-SD-LABEL: fptoui_i8:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: fcvtzs v0.4h, v0.4h
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: fptoui_i8:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: fptoui_i8:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: fcvtzu v0.4h, v0.4h
+; CHECK-FP16-GI-NEXT: ret
; NOTE: fcvtzs selected here because the xtn shaves the sign bit
%1 = fptoui<4 x half> %a to <4 x i8>
ret <4 x i8> %1
@@ -364,36 +523,45 @@ define <4 x i16> @fptoui_i16(<4 x half> %a) #0 {
}
define <4 x i1> @test_fcmp_une(<4 x half> %a, <4 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_une:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmeq v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: mvn v0.16b, v0.16b
-; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_une:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_une:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcmeq v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: mvn v0.8b, v0.8b
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_une:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp une <4 x half> %a, %b
ret <4 x i1> %1
}
define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_ueq:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmgt v2.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: mvn v0.8b, v0.8b
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_ueq:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_ueq:
; CHECK-FP16: // %bb.0:
@@ -402,102 +570,149 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b
; CHECK-FP16-NEXT: mvn v0.8b, v0.8b
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_ueq:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp ueq <4 x half> %a, %b
ret <4 x i1> %1
}
define <4 x i1> @test_fcmp_ugt(<4 x half> %a, <4 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_ugt:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcmge v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: mvn v0.8b, v0.8b
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_ugt:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcmge v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_ugt:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcmge v0.4h, v1.4h, v0.4h
; CHECK-FP16-NEXT: mvn v0.8b, v0.8b
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_ugt:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcmge v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp ugt <4 x half> %a, %b
ret <4 x i1> %1
}
define <4 x i1> @test_fcmp_uge(<4 x half> %a, <4 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_uge:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: mvn v0.8b, v0.8b
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_uge:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_uge:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h
; CHECK-FP16-NEXT: mvn v0.8b, v0.8b
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_uge:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp uge <4 x half> %a, %b
ret <4 x i1> %1
}
define <4 x i1> @test_fcmp_ult(<4 x half> %a, <4 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_ult:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmge v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: mvn v0.8b, v0.8b
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_ult:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_ult:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcmge v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: mvn v0.8b, v0.8b
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_ult:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcmge v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp ult <4 x half> %a, %b
ret <4 x i1> %1
}
define <4 x i1> @test_fcmp_ule(<4 x half> %a, <4 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_ule:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmgt v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: mvn v0.8b, v0.8b
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_ule:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_ule:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcmgt v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: mvn v0.8b, v0.8b
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_ule:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp ule <4 x half> %a, %b
ret <4 x i1> %1
}
define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_uno:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmge v2.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: mvn v0.8b, v0.8b
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_uno:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmge v2.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_uno:
; CHECK-FP16: // %bb.0:
@@ -506,21 +721,32 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b
; CHECK-FP16-NEXT: mvn v0.8b, v0.8b
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_uno:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp uno <4 x half> %a, %b
ret <4 x i1> %1
}
define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_one:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmgt v2.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_one:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_one:
; CHECK-FP16: // %bb.0:
@@ -528,60 +754,94 @@ define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h
; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_one:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp one <4 x half> %a, %b
ret <4 x i1> %1
}
define <4 x i1> @test_fcmp_oeq(<4 x half> %a, <4 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_oeq:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmeq v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_oeq:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_oeq:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcmeq v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_oeq:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp oeq <4 x half> %a, %b
ret <4 x i1> %1
}
define <4 x i1> @test_fcmp_ogt(<4 x half> %a, <4 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_ogt:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmgt v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_ogt:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_ogt:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcmgt v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_ogt:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp ogt <4 x half> %a, %b
ret <4 x i1> %1
}
define <4 x i1> @test_fcmp_oge(<4 x half> %a, <4 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_oge:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmge v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_oge:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_oge:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcmge v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_oge:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcmge v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp oge <4 x half> %a, %b
ret <4 x i1> %1
@@ -624,15 +884,15 @@ define <4 x i1> @test_fcmp_ole(<4 x half> %a, <4 x half> %b) #0 {
}
define <4 x i1> @test_fcmp_ord(<4 x half> %a, <4 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_ord:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmge v2.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT: xtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_ord:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmge v2.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-CVT-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_ord:
; CHECK-FP16: // %bb.0:
@@ -640,6 +900,16 @@ define <4 x i1> @test_fcmp_ord(<4 x half> %a, <4 x half> %b) #0 {
; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h
; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_ord:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v2.16b
+; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp ord <4 x half> %a, %b
ret <4 x i1> %1
diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
index fcb42a74ce697..86763eb5f9e3b 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -1,24 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT,CHECK-CVT-SD
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-FP16-SD
+; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-CVT,CHECK-CVT-GI
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-FP16-GI
define <8 x half> @add_h(<8 x half> %a, <8 x half> %b) {
-; CHECK-CVT-LABEL: add_h:
-; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: fcvtl v2.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v3.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-CVT-NEXT: fadd v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fadd v1.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v2.4s
-; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: add_h:
+; CHECK-CVT-SD: // %bb.0: // %entry
+; CHECK-CVT-SD-NEXT: fcvtl v2.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v3.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fadd v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fadd v1.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: add_h:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fadd v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: add_h:
+; CHECK-CVT-GI: // %bb.0: // %entry
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fadd v2.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fadd v1.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v1.4s
+; CHECK-CVT-GI-NEXT: ret
entry:
%0 = fadd <8 x half> %a, %b
ret <8 x half> %0
@@ -26,22 +40,34 @@ entry:
define <8 x half> @sub_h(<8 x half> %a, <8 x half> %b) {
-; CHECK-CVT-LABEL: sub_h:
-; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: fcvtl v2.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v3.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-CVT-NEXT: fsub v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fsub v1.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v2.4s
-; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: sub_h:
+; CHECK-CVT-SD: // %bb.0: // %entry
+; CHECK-CVT-SD-NEXT: fcvtl v2.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v3.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fsub v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fsub v1.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: sub_h:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fsub v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: sub_h:
+; CHECK-CVT-GI: // %bb.0: // %entry
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fsub v2.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fsub v1.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v1.4s
+; CHECK-CVT-GI-NEXT: ret
entry:
%0 = fsub <8 x half> %a, %b
ret <8 x half> %0
@@ -49,22 +75,34 @@ entry:
define <8 x half> @mul_h(<8 x half> %a, <8 x half> %b) {
-; CHECK-CVT-LABEL: mul_h:
-; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: fcvtl v2.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v3.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-CVT-NEXT: fmul v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fmul v1.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v2.4s
-; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: mul_h:
+; CHECK-CVT-SD: // %bb.0: // %entry
+; CHECK-CVT-SD-NEXT: fcvtl v2.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v3.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fmul v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fmul v1.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: mul_h:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fmul v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: mul_h:
+; CHECK-CVT-GI: // %bb.0: // %entry
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fmul v2.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fmul v1.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v1.4s
+; CHECK-CVT-GI-NEXT: ret
entry:
%0 = fmul <8 x half> %a, %b
ret <8 x half> %0
@@ -72,22 +110,34 @@ entry:
define <8 x half> @div_h(<8 x half> %a, <8 x half> %b) {
-; CHECK-CVT-LABEL: div_h:
-; CHECK-CVT: // %bb.0: // %entry
-; CHECK-CVT-NEXT: fcvtl v2.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v3.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-CVT-NEXT: fdiv v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fdiv v1.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v2.4s
-; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: div_h:
+; CHECK-CVT-SD: // %bb.0: // %entry
+; CHECK-CVT-SD-NEXT: fcvtl v2.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v3.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fdiv v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fdiv v1.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: div_h:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fdiv v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: div_h:
+; CHECK-CVT-GI: // %bb.0: // %entry
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fdiv v2.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fdiv v1.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v1.4s
+; CHECK-CVT-GI-NEXT: ret
entry:
%0 = fdiv <8 x half> %a, %b
ret <8 x half> %0
@@ -126,39 +176,171 @@ define <8 x half> @s_to_h(<8 x float> %a) {
}
define <8 x half> @d_to_h(<8 x double> %a) {
-; CHECK-LABEL: d_to_h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtxn v0.2s, v0.2d
-; CHECK-NEXT: fcvtxn v2.2s, v2.2d
-; CHECK-NEXT: fcvtxn2 v0.4s, v1.2d
-; CHECK-NEXT: fcvtxn2 v2.4s, v3.2d
-; CHECK-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-NEXT: fcvtn2 v0.8h, v2.4s
-; CHECK-NEXT: ret
+; CHECK-CVT-SD-LABEL: d_to_h:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtxn v0.2s, v0.2d
+; CHECK-CVT-SD-NEXT: fcvtxn v2.2s, v2.2d
+; CHECK-CVT-SD-NEXT: fcvtxn2 v0.4s, v1.2d
+; CHECK-CVT-SD-NEXT: fcvtxn2 v2.4s, v3.2d
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-CVT-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: d_to_h:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: fcvtxn v0.2s, v0.2d
+; CHECK-FP16-SD-NEXT: fcvtxn v2.2s, v2.2d
+; CHECK-FP16-SD-NEXT: fcvtxn2 v0.4s, v1.2d
+; CHECK-FP16-SD-NEXT: fcvtxn2 v2.4s, v3.2d
+; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-FP16-SD-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: d_to_h:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: mov d4, v0.d[1]
+; CHECK-CVT-GI-NEXT: fcvt h0, d0
+; CHECK-CVT-GI-NEXT: mov d5, v1.d[1]
+; CHECK-CVT-GI-NEXT: fcvt h1, d1
+; CHECK-CVT-GI-NEXT: fcvt h4, d4
+; CHECK-CVT-GI-NEXT: mov v0.h[1], v4.h[0]
+; CHECK-CVT-GI-NEXT: fcvt h4, d5
+; CHECK-CVT-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-CVT-GI-NEXT: mov d1, v2.d[1]
+; CHECK-CVT-GI-NEXT: fcvt h2, d2
+; CHECK-CVT-GI-NEXT: mov v0.h[3], v4.h[0]
+; CHECK-CVT-GI-NEXT: fcvt h1, d1
+; CHECK-CVT-GI-NEXT: mov v0.h[4], v2.h[0]
+; CHECK-CVT-GI-NEXT: mov d2, v3.d[1]
+; CHECK-CVT-GI-NEXT: fcvt h3, d3
+; CHECK-CVT-GI-NEXT: mov v0.h[5], v1.h[0]
+; CHECK-CVT-GI-NEXT: fcvt h1, d2
+; CHECK-CVT-GI-NEXT: mov v0.h[6], v3.h[0]
+; CHECK-CVT-GI-NEXT: mov v0.h[7], v1.h[0]
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: d_to_h:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: mov d4, v0.d[1]
+; CHECK-FP16-GI-NEXT: fcvt h0, d0
+; CHECK-FP16-GI-NEXT: mov d5, v1.d[1]
+; CHECK-FP16-GI-NEXT: fcvt h1, d1
+; CHECK-FP16-GI-NEXT: fcvt h4, d4
+; CHECK-FP16-GI-NEXT: mov v0.h[1], v4.h[0]
+; CHECK-FP16-GI-NEXT: fcvt h4, d5
+; CHECK-FP16-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-FP16-GI-NEXT: mov d1, v2.d[1]
+; CHECK-FP16-GI-NEXT: fcvt h2, d2
+; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0]
+; CHECK-FP16-GI-NEXT: fcvt h1, d1
+; CHECK-FP16-GI-NEXT: mov v0.h[4], v2.h[0]
+; CHECK-FP16-GI-NEXT: mov d2, v3.d[1]
+; CHECK-FP16-GI-NEXT: fcvt h3, d3
+; CHECK-FP16-GI-NEXT: mov v0.h[5], v1.h[0]
+; CHECK-FP16-GI-NEXT: fcvt h1, d2
+; CHECK-FP16-GI-NEXT: mov v0.h[6], v3.h[0]
+; CHECK-FP16-GI-NEXT: mov v0.h[7], v1.h[0]
+; CHECK-FP16-GI-NEXT: ret
%1 = fptrunc <8 x double> %a to <8 x half>
ret <8 x half> %1
}
define <8 x float> @h_to_s(<8 x half> %a) {
-; CHECK-LABEL: h_to_s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtl2 v1.4s, v0.8h
-; CHECK-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-NEXT: ret
+; CHECK-CVT-SD-LABEL: h_to_s:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: h_to_s:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: h_to_s:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-GI-NEXT: mov v0.16b, v2.16b
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: h_to_s:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-FP16-GI-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-FP16-GI-NEXT: mov v0.16b, v2.16b
+; CHECK-FP16-GI-NEXT: ret
%1 = fpext <8 x half> %a to <8 x float>
ret <8 x float> %1
}
define <8 x double> @h_to_d(<8 x half> %a) {
-; CHECK-LABEL: h_to_d:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-NEXT: fcvtl v0.2d, v1.2s
-; CHECK-NEXT: fcvtl2 v3.2d, v2.4s
-; CHECK-NEXT: fcvtl2 v1.2d, v1.4s
-; CHECK-NEXT: fcvtl v2.2d, v2.2s
-; CHECK-NEXT: ret
+; CHECK-CVT-SD-LABEL: h_to_d:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v0.2d, v1.2s
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.2d, v2.4s
+; CHECK-CVT-SD-NEXT: fcvtl2 v1.2d, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtl v2.2d, v2.2s
+; CHECK-CVT-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: h_to_d:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-FP16-SD-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-FP16-SD-NEXT: fcvtl v0.2d, v1.2s
+; CHECK-FP16-SD-NEXT: fcvtl2 v3.2d, v2.4s
+; CHECK-FP16-SD-NEXT: fcvtl2 v1.2d, v1.4s
+; CHECK-FP16-SD-NEXT: fcvtl v2.2d, v2.2s
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: h_to_d:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
+; CHECK-CVT-GI-NEXT: mov h2, v0.h[2]
+; CHECK-CVT-GI-NEXT: mov h3, v0.h[3]
+; CHECK-CVT-GI-NEXT: mov h4, v0.h[4]
+; CHECK-CVT-GI-NEXT: mov h5, v0.h[5]
+; CHECK-CVT-GI-NEXT: mov h6, v0.h[6]
+; CHECK-CVT-GI-NEXT: mov h7, v0.h[7]
+; CHECK-CVT-GI-NEXT: fcvt d0, h0
+; CHECK-CVT-GI-NEXT: fcvt d16, h1
+; CHECK-CVT-GI-NEXT: fcvt d1, h2
+; CHECK-CVT-GI-NEXT: fcvt d17, h3
+; CHECK-CVT-GI-NEXT: fcvt d2, h4
+; CHECK-CVT-GI-NEXT: fcvt d4, h5
+; CHECK-CVT-GI-NEXT: fcvt d3, h6
+; CHECK-CVT-GI-NEXT: fcvt d5, h7
+; CHECK-CVT-GI-NEXT: mov v0.d[1], v16.d[0]
+; CHECK-CVT-GI-NEXT: mov v1.d[1], v17.d[0]
+; CHECK-CVT-GI-NEXT: mov v2.d[1], v4.d[0]
+; CHECK-CVT-GI-NEXT: mov v3.d[1], v5.d[0]
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: h_to_d:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
+; CHECK-FP16-GI-NEXT: mov h2, v0.h[2]
+; CHECK-FP16-GI-NEXT: mov h3, v0.h[3]
+; CHECK-FP16-GI-NEXT: mov h4, v0.h[4]
+; CHECK-FP16-GI-NEXT: mov h5, v0.h[5]
+; CHECK-FP16-GI-NEXT: mov h6, v0.h[6]
+; CHECK-FP16-GI-NEXT: mov h7, v0.h[7]
+; CHECK-FP16-GI-NEXT: fcvt d0, h0
+; CHECK-FP16-GI-NEXT: fcvt d16, h1
+; CHECK-FP16-GI-NEXT: fcvt d1, h2
+; CHECK-FP16-GI-NEXT: fcvt d17, h3
+; CHECK-FP16-GI-NEXT: fcvt d2, h4
+; CHECK-FP16-GI-NEXT: fcvt d4, h5
+; CHECK-FP16-GI-NEXT: fcvt d3, h6
+; CHECK-FP16-GI-NEXT: fcvt d5, h7
+; CHECK-FP16-GI-NEXT: mov v0.d[1], v16.d[0]
+; CHECK-FP16-GI-NEXT: mov v1.d[1], v17.d[0]
+; CHECK-FP16-GI-NEXT: mov v2.d[1], v4.d[0]
+; CHECK-FP16-GI-NEXT: mov v3.d[1], v5.d[0]
+; CHECK-FP16-GI-NEXT: ret
%1 = fpext <8 x half> %a to <8 x double>
ret <8 x double> %1
}
@@ -183,14 +365,14 @@ define <8 x i16> @bitcast_h_to_i(float, <8 x half> %a) {
}
define <4 x half> @sitofp_v4i8(<4 x i8> %a) #0 {
-; CHECK-CVT-LABEL: sitofp_v4i8:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: shl v0.4h, v0.4h, #8
-; CHECK-CVT-NEXT: sshr v0.4h, v0.4h, #8
-; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: sitofp_v4i8:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-CVT-SD-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-CVT-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-CVT-SD-NEXT: scvtf v0.4s, v0.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: sitofp_v4i8:
; CHECK-FP16: // %bb.0:
@@ -198,76 +380,132 @@ define <4 x half> @sitofp_v4i8(<4 x i8> %a) #0 {
; CHECK-FP16-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-FP16-NEXT: scvtf v0.4h, v0.4h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: sitofp_v4i8:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-CVT-GI-NEXT: shl v0.4s, v0.4s, #24
+; CHECK-CVT-GI-NEXT: sshr v0.4s, v0.4s, #24
+; CHECK-CVT-GI-NEXT: scvtf v0.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = sitofp <4 x i8> %a to <4 x half>
ret <4 x half> %1
}
define <8 x half> @sitofp_v8i8(<8 x i8> %a) #0 {
-; CHECK-CVT-LABEL: sitofp_v8i8:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-CVT-NEXT: sshll v1.4s, v0.4h, #0
-; CHECK-CVT-NEXT: sshll2 v2.4s, v0.8h, #0
-; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v1.4s
-; CHECK-CVT-NEXT: scvtf v1.4s, v2.4s
-; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: sitofp_v8i8:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-CVT-SD-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-CVT-SD-NEXT: sshll2 v2.4s, v0.8h, #0
+; CHECK-CVT-SD-NEXT: scvtf v1.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-CVT-SD-NEXT: scvtf v1.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: sitofp_v8i8:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: sitofp_v8i8:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-CVT-GI-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-CVT-GI-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-CVT-GI-NEXT: scvtf v1.4s, v1.4s
+; CHECK-CVT-GI-NEXT: scvtf v2.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = sitofp <8 x i8> %a to <8 x half>
ret <8 x half> %1
}
define <16 x half> @sitofp_v16i8(<16 x i8> %a) #0 {
-; CHECK-CVT-LABEL: sitofp_v16i8:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: sshll2 v1.8h, v0.16b, #0
-; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-CVT-NEXT: sshll v2.4s, v1.4h, #0
-; CHECK-CVT-NEXT: sshll v3.4s, v0.4h, #0
-; CHECK-CVT-NEXT: sshll2 v4.4s, v1.8h, #0
-; CHECK-CVT-NEXT: sshll2 v5.4s, v0.8h, #0
-; CHECK-CVT-NEXT: scvtf v2.4s, v2.4s
-; CHECK-CVT-NEXT: scvtf v3.4s, v3.4s
-; CHECK-CVT-NEXT: fcvtn v1.4h, v2.4s
-; CHECK-CVT-NEXT: scvtf v2.4s, v4.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v3.4s
-; CHECK-CVT-NEXT: scvtf v3.4s, v5.4s
-; CHECK-CVT-NEXT: fcvtn2 v1.8h, v2.4s
-; CHECK-CVT-NEXT: fcvtn2 v0.8h, v3.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: sitofp_v16i8:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: sshll2 v1.8h, v0.16b, #0
-; CHECK-FP16-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-FP16-NEXT: scvtf v1.8h, v1.8h
-; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h
-; CHECK-FP16-NEXT: ret
+; CHECK-CVT-SD-LABEL: sitofp_v16i8:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-CVT-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-CVT-SD-NEXT: sshll v2.4s, v1.4h, #0
+; CHECK-CVT-SD-NEXT: sshll v3.4s, v0.4h, #0
+; CHECK-CVT-SD-NEXT: sshll2 v4.4s, v1.8h, #0
+; CHECK-CVT-SD-NEXT: sshll2 v5.4s, v0.8h, #0
+; CHECK-CVT-SD-NEXT: scvtf v2.4s, v2.4s
+; CHECK-CVT-SD-NEXT: scvtf v3.4s, v3.4s
+; CHECK-CVT-SD-NEXT: fcvtn v1.4h, v2.4s
+; CHECK-CVT-SD-NEXT: scvtf v2.4s, v4.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v3.4s
+; CHECK-CVT-SD-NEXT: scvtf v3.4s, v5.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v1.8h, v2.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v3.4s
+; CHECK-CVT-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: sitofp_v16i8:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-FP16-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-FP16-SD-NEXT: scvtf v1.8h, v1.8h
+; CHECK-FP16-SD-NEXT: scvtf v0.8h, v0.8h
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: sitofp_v16i8:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-CVT-GI-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-CVT-GI-NEXT: sshll v2.4s, v1.4h, #0
+; CHECK-CVT-GI-NEXT: sshll v3.4s, v0.4h, #0
+; CHECK-CVT-GI-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-CVT-GI-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-CVT-GI-NEXT: scvtf v2.4s, v2.4s
+; CHECK-CVT-GI-NEXT: scvtf v3.4s, v3.4s
+; CHECK-CVT-GI-NEXT: scvtf v4.4s, v1.4s
+; CHECK-CVT-GI-NEXT: scvtf v5.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-CVT-GI-NEXT: fcvtn v1.4h, v3.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v4.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v1.8h, v5.4s
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: sitofp_v16i8:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-FP16-GI-NEXT: sshll2 v2.8h, v0.16b, #0
+; CHECK-FP16-GI-NEXT: scvtf v0.8h, v1.8h
+; CHECK-FP16-GI-NEXT: scvtf v1.8h, v2.8h
+; CHECK-FP16-GI-NEXT: ret
%1 = sitofp <16 x i8> %a to <16 x half>
ret <16 x half> %1
}
define <8 x half> @sitofp_i16(<8 x i16> %a) #0 {
-; CHECK-CVT-LABEL: sitofp_i16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: sshll v1.4s, v0.4h, #0
-; CHECK-CVT-NEXT: sshll2 v2.4s, v0.8h, #0
-; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v1.4s
-; CHECK-CVT-NEXT: scvtf v1.4s, v2.4s
-; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: sitofp_i16:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-CVT-SD-NEXT: sshll2 v2.4s, v0.8h, #0
+; CHECK-CVT-SD-NEXT: scvtf v1.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-CVT-SD-NEXT: scvtf v1.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: sitofp_i16:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: sitofp_i16:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-CVT-GI-NEXT: sshll2 v0.4s, v0.8h, #0
+; CHECK-CVT-GI-NEXT: scvtf v1.4s, v1.4s
+; CHECK-CVT-GI-NEXT: scvtf v2.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = sitofp <8 x i16> %a to <8 x half>
ret <8 x half> %1
}
@@ -286,108 +524,213 @@ define <8 x half> @sitofp_i32(<8 x i32> %a) #0 {
define <8 x half> @sitofp_i64(<8 x i64> %a) #0 {
-; CHECK-LABEL: sitofp_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: scvtf v2.2d, v2.2d
-; CHECK-NEXT: scvtf v1.2d, v1.2d
-; CHECK-NEXT: scvtf v3.2d, v3.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn v2.2s, v2.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
-; CHECK-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-NEXT: fcvtn2 v0.8h, v2.4s
-; CHECK-NEXT: ret
+; CHECK-CVT-SD-LABEL: sitofp_i64:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: scvtf v0.2d, v0.2d
+; CHECK-CVT-SD-NEXT: scvtf v2.2d, v2.2d
+; CHECK-CVT-SD-NEXT: scvtf v1.2d, v1.2d
+; CHECK-CVT-SD-NEXT: scvtf v3.2d, v3.2d
+; CHECK-CVT-SD-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-CVT-SD-NEXT: fcvtn v2.2s, v2.2d
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-CVT-SD-NEXT: fcvtn2 v2.4s, v3.2d
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-CVT-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: sitofp_i64:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: scvtf v0.2d, v0.2d
+; CHECK-FP16-SD-NEXT: scvtf v2.2d, v2.2d
+; CHECK-FP16-SD-NEXT: scvtf v1.2d, v1.2d
+; CHECK-FP16-SD-NEXT: scvtf v3.2d, v3.2d
+; CHECK-FP16-SD-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-FP16-SD-NEXT: fcvtn v2.2s, v2.2d
+; CHECK-FP16-SD-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-FP16-SD-NEXT: fcvtn2 v2.4s, v3.2d
+; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-FP16-SD-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: sitofp_i64:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: scvtf v0.2d, v0.2d
+; CHECK-CVT-GI-NEXT: scvtf v1.2d, v1.2d
+; CHECK-CVT-GI-NEXT: scvtf v2.2d, v2.2d
+; CHECK-CVT-GI-NEXT: scvtf v3.2d, v3.2d
+; CHECK-CVT-GI-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-CVT-GI-NEXT: fcvtn v2.2s, v2.2d
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-CVT-GI-NEXT: fcvtn2 v2.4s, v3.2d
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: sitofp_i64:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: scvtf v0.2d, v0.2d
+; CHECK-FP16-GI-NEXT: scvtf v1.2d, v1.2d
+; CHECK-FP16-GI-NEXT: scvtf v2.2d, v2.2d
+; CHECK-FP16-GI-NEXT: scvtf v3.2d, v3.2d
+; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-FP16-GI-NEXT: fcvtn v2.2s, v2.2d
+; CHECK-FP16-GI-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-FP16-GI-NEXT: fcvtn2 v2.4s, v3.2d
+; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-FP16-GI-NEXT: ret
%1 = sitofp <8 x i64> %a to <8 x half>
ret <8 x half> %1
}
define <4 x half> @uitofp_v4i8(<4 x i8> %a) #0 {
-; CHECK-CVT-LABEL: uitofp_v4i8:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: uitofp_v4i8:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-FP16-NEXT: ucvtf v0.4h, v0.4h
-; CHECK-FP16-NEXT: ret
+; CHECK-CVT-SD-LABEL: uitofp_v4i8:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-CVT-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-CVT-SD-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: uitofp_v4i8:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-FP16-SD-NEXT: ucvtf v0.4h, v0.4h
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: uitofp_v4i8:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: movi v1.2d, #0x0000ff000000ff
+; CHECK-CVT-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-CVT-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-CVT-GI-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: uitofp_v4i8:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: movi d1, #0xff00ff00ff00ff
+; CHECK-FP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-FP16-GI-NEXT: ucvtf v0.4h, v0.4h
+; CHECK-FP16-GI-NEXT: ret
%1 = uitofp <4 x i8> %a to <4 x half>
ret <4 x half> %1
}
define <8 x half> @uitofp_v8i8(<8 x i8> %a) #0 {
-; CHECK-CVT-LABEL: uitofp_v8i8:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-CVT-NEXT: ushll v1.4s, v0.4h, #0
-; CHECK-CVT-NEXT: ushll2 v2.4s, v0.8h, #0
-; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v1.4s
-; CHECK-CVT-NEXT: ucvtf v1.4s, v2.4s
-; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: uitofp_v8i8:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-CVT-SD-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-CVT-SD-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-CVT-SD-NEXT: ucvtf v1.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-CVT-SD-NEXT: ucvtf v1.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: uitofp_v8i8:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: uitofp_v8i8:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-CVT-GI-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-CVT-GI-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-CVT-GI-NEXT: ucvtf v1.4s, v1.4s
+; CHECK-CVT-GI-NEXT: ucvtf v2.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = uitofp <8 x i8> %a to <8 x half>
ret <8 x half> %1
}
define <16 x half> @uitofp_v16i8(<16 x i8> %a) #0 {
-; CHECK-CVT-LABEL: uitofp_v16i8:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: ushll2 v1.8h, v0.16b, #0
-; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-CVT-NEXT: ushll v2.4s, v1.4h, #0
-; CHECK-CVT-NEXT: ushll v3.4s, v0.4h, #0
-; CHECK-CVT-NEXT: ushll2 v4.4s, v1.8h, #0
-; CHECK-CVT-NEXT: ushll2 v5.4s, v0.8h, #0
-; CHECK-CVT-NEXT: ucvtf v2.4s, v2.4s
-; CHECK-CVT-NEXT: ucvtf v3.4s, v3.4s
-; CHECK-CVT-NEXT: fcvtn v1.4h, v2.4s
-; CHECK-CVT-NEXT: ucvtf v2.4s, v4.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v3.4s
-; CHECK-CVT-NEXT: ucvtf v3.4s, v5.4s
-; CHECK-CVT-NEXT: fcvtn2 v1.8h, v2.4s
-; CHECK-CVT-NEXT: fcvtn2 v0.8h, v3.4s
-; CHECK-CVT-NEXT: ret
-;
-; CHECK-FP16-LABEL: uitofp_v16i8:
-; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: ushll2 v1.8h, v0.16b, #0
-; CHECK-FP16-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-FP16-NEXT: ucvtf v1.8h, v1.8h
-; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h
-; CHECK-FP16-NEXT: ret
+; CHECK-CVT-SD-LABEL: uitofp_v16i8:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-CVT-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-CVT-SD-NEXT: ushll v2.4s, v1.4h, #0
+; CHECK-CVT-SD-NEXT: ushll v3.4s, v0.4h, #0
+; CHECK-CVT-SD-NEXT: ushll2 v4.4s, v1.8h, #0
+; CHECK-CVT-SD-NEXT: ushll2 v5.4s, v0.8h, #0
+; CHECK-CVT-SD-NEXT: ucvtf v2.4s, v2.4s
+; CHECK-CVT-SD-NEXT: ucvtf v3.4s, v3.4s
+; CHECK-CVT-SD-NEXT: fcvtn v1.4h, v2.4s
+; CHECK-CVT-SD-NEXT: ucvtf v2.4s, v4.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v3.4s
+; CHECK-CVT-SD-NEXT: ucvtf v3.4s, v5.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v1.8h, v2.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v3.4s
+; CHECK-CVT-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: uitofp_v16i8:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-FP16-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-FP16-SD-NEXT: ucvtf v1.8h, v1.8h
+; CHECK-FP16-SD-NEXT: ucvtf v0.8h, v0.8h
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: uitofp_v16i8:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-CVT-GI-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-CVT-GI-NEXT: ushll v2.4s, v1.4h, #0
+; CHECK-CVT-GI-NEXT: ushll v3.4s, v0.4h, #0
+; CHECK-CVT-GI-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-CVT-GI-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-CVT-GI-NEXT: ucvtf v2.4s, v2.4s
+; CHECK-CVT-GI-NEXT: ucvtf v3.4s, v3.4s
+; CHECK-CVT-GI-NEXT: ucvtf v4.4s, v1.4s
+; CHECK-CVT-GI-NEXT: ucvtf v5.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-CVT-GI-NEXT: fcvtn v1.4h, v3.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v4.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v1.8h, v5.4s
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: uitofp_v16i8:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-FP16-GI-NEXT: ushll2 v2.8h, v0.16b, #0
+; CHECK-FP16-GI-NEXT: ucvtf v0.8h, v1.8h
+; CHECK-FP16-GI-NEXT: ucvtf v1.8h, v2.8h
+; CHECK-FP16-GI-NEXT: ret
%1 = uitofp <16 x i8> %a to <16 x half>
ret <16 x half> %1
}
define <8 x half> @uitofp_i16(<8 x i16> %a) #0 {
-; CHECK-CVT-LABEL: uitofp_i16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: ushll v1.4s, v0.4h, #0
-; CHECK-CVT-NEXT: ushll2 v2.4s, v0.8h, #0
-; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtn v0.4h, v1.4s
-; CHECK-CVT-NEXT: ucvtf v1.4s, v2.4s
-; CHECK-CVT-NEXT: fcvtn2 v0.8h, v1.4s
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: uitofp_i16:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-CVT-SD-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-CVT-SD-NEXT: ucvtf v1.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-CVT-SD-NEXT: ucvtf v1.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v1.4s
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: uitofp_i16:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: uitofp_i16:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-CVT-GI-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-CVT-GI-NEXT: ucvtf v1.4s, v1.4s
+; CHECK-CVT-GI-NEXT: ucvtf v2.4s, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-CVT-GI-NEXT: ret
%1 = uitofp <8 x i16> %a to <8 x half>
ret <8 x half> %1
}
@@ -407,19 +750,61 @@ define <8 x half> @uitofp_i32(<8 x i32> %a) #0 {
define <8 x half> @uitofp_i64(<8 x i64> %a) #0 {
-; CHECK-LABEL: uitofp_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-NEXT: ucvtf v3.2d, v3.2d
-; CHECK-NEXT: fcvtn v0.2s, v0.2d
-; CHECK-NEXT: fcvtn v2.2s, v2.2d
-; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT: fcvtn2 v2.4s, v3.2d
-; CHECK-NEXT: fcvtn v0.4h, v0.4s
-; CHECK-NEXT: fcvtn2 v0.8h, v2.4s
-; CHECK-NEXT: ret
+; CHECK-CVT-SD-LABEL: uitofp_i64:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-CVT-SD-NEXT: ucvtf v2.2d, v2.2d
+; CHECK-CVT-SD-NEXT: ucvtf v1.2d, v1.2d
+; CHECK-CVT-SD-NEXT: ucvtf v3.2d, v3.2d
+; CHECK-CVT-SD-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-CVT-SD-NEXT: fcvtn v2.2s, v2.2d
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-CVT-SD-NEXT: fcvtn2 v2.4s, v3.2d
+; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-CVT-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: uitofp_i64:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-FP16-SD-NEXT: ucvtf v2.2d, v2.2d
+; CHECK-FP16-SD-NEXT: ucvtf v1.2d, v1.2d
+; CHECK-FP16-SD-NEXT: ucvtf v3.2d, v3.2d
+; CHECK-FP16-SD-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-FP16-SD-NEXT: fcvtn v2.2s, v2.2d
+; CHECK-FP16-SD-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-FP16-SD-NEXT: fcvtn2 v2.4s, v3.2d
+; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-FP16-SD-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: uitofp_i64:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-CVT-GI-NEXT: ucvtf v1.2d, v1.2d
+; CHECK-CVT-GI-NEXT: ucvtf v2.2d, v2.2d
+; CHECK-CVT-GI-NEXT: ucvtf v3.2d, v3.2d
+; CHECK-CVT-GI-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-CVT-GI-NEXT: fcvtn v2.2s, v2.2d
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-CVT-GI-NEXT: fcvtn2 v2.4s, v3.2d
+; CHECK-CVT-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-CVT-GI-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: uitofp_i64:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-FP16-GI-NEXT: ucvtf v1.2d, v1.2d
+; CHECK-FP16-GI-NEXT: ucvtf v2.2d, v2.2d
+; CHECK-FP16-GI-NEXT: ucvtf v3.2d, v3.2d
+; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-FP16-GI-NEXT: fcvtn v2.2s, v2.2d
+; CHECK-FP16-GI-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-FP16-GI-NEXT: fcvtn2 v2.4s, v3.2d
+; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-FP16-GI-NEXT: fcvtn2 v0.8h, v2.4s
+; CHECK-FP16-GI-NEXT: ret
%1 = uitofp <8 x i64> %a to <8 x half>
ret <8 x half> %1
}
@@ -436,94 +821,132 @@ define void @test_insert_at_zero(half %a, ptr %b) #0 {
}
define <8 x i8> @fptosi_i8(<8 x half> %a) #0 {
-; CHECK-CVT-LABEL: fptosi_i8:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: fptosi_i8:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtzs v1.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: fptosi_i8:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: fptosi_i8:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtzs v1.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fptosi<8 x half> %a to <8 x i8>
ret <8 x i8> %1
}
define <8 x i16> @fptosi_i16(<8 x half> %a) #0 {
-; CHECK-CVT-LABEL: fptosi_i16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: fptosi_i16:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtzs v1.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: fptosi_i16:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: fptosi_i16:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtzs v1.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fptosi<8 x half> %a to <8 x i16>
ret <8 x i16> %1
}
define <8 x i8> @fptoui_i8(<8 x half> %a) #0 {
-; CHECK-CVT-LABEL: fptoui_i8:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: fptoui_i8:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: fptoui_i8:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: fptoui_i8:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fptoui<8 x half> %a to <8 x i8>
ret <8 x i8> %1
}
define <8 x i16> @fptoui_i16(<8 x half> %a) #0 {
-; CHECK-CVT-LABEL: fptoui_i16:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s
-; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: fptoui_i16:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v1.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: fptoui_i16:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: fptoui_i16:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtzu v1.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fptoui<8 x half> %a to <8 x i16>
ret <8 x i16> %1
}
define <8 x i1> @test_fcmp_une(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_une:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmeq v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmeq v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT: mvn v0.16b, v0.16b
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_une:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmeq v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_une:
; CHECK-FP16: // %bb.0:
@@ -531,27 +954,41 @@ define <8 x i1> @test_fcmp_une(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-FP16-NEXT: mvn v0.16b, v0.16b
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_une:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmeq v2.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: mvn v1.16b, v2.16b
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp une <8 x half> %a, %b
ret <8 x i1> %1
}
define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_ueq:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmgt v4.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmgt v2.4s, v2.4s, v3.4s
-; CHECK-CVT-NEXT: fcmgt v3.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: orr v1.16b, v2.16b, v4.16b
-; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v3.16b
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-CVT-NEXT: mvn v0.16b, v0.16b
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_ueq:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmgt v4.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v2.4s, v3.4s
+; CHECK-CVT-SD-NEXT: fcmgt v3.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: orr v1.16b, v2.16b, v4.16b
+; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v3.16b
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_ueq:
; CHECK-FP16: // %bb.0:
@@ -561,23 +998,41 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-FP16-NEXT: mvn v0.16b, v0.16b
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_ueq:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmgt v4.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v3.4s, v2.4s
+; CHECK-CVT-GI-NEXT: fcmgt v3.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: orr v1.16b, v2.16b, v4.16b
+; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v3.16b
+; CHECK-CVT-GI-NEXT: mvn v1.16b, v1.16b
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp ueq <8 x half> %a, %b
ret <8 x i1> %1
}
define <8 x i1> @test_fcmp_ugt(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_ugt:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcmge v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmge v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT: mvn v0.16b, v0.16b
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_ugt:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcmge v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmge v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_ugt:
; CHECK-FP16: // %bb.0:
@@ -585,23 +1040,37 @@ define <8 x i1> @test_fcmp_ugt(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-FP16-NEXT: mvn v0.16b, v0.16b
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_ugt:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmge v2.4s, v3.4s, v2.4s
+; CHECK-CVT-GI-NEXT: fcmge v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: mvn v1.16b, v2.16b
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp ugt <8 x half> %a, %b
ret <8 x i1> %1
}
define <8 x i1> @test_fcmp_uge(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_uge:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcmgt v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT: mvn v0.16b, v0.16b
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_uge:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_uge:
; CHECK-FP16: // %bb.0:
@@ -609,23 +1078,37 @@ define <8 x i1> @test_fcmp_uge(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-FP16-NEXT: mvn v0.16b, v0.16b
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_uge:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v3.4s, v2.4s
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: mvn v1.16b, v2.16b
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp uge <8 x half> %a, %b
ret <8 x i1> %1
}
define <8 x i1> @test_fcmp_ult(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_ult:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmge v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmge v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT: mvn v0.16b, v0.16b
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_ult:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmge v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_ult:
; CHECK-FP16: // %bb.0:
@@ -633,23 +1116,37 @@ define <8 x i1> @test_fcmp_ult(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-FP16-NEXT: mvn v0.16b, v0.16b
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_ult:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmge v2.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fcmge v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: mvn v1.16b, v2.16b
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp ult <8 x half> %a, %b
ret <8 x i1> %1
}
define <8 x i1> @test_fcmp_ule(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_ule:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmgt v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmgt v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT: mvn v0.16b, v0.16b
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_ule:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_ule:
; CHECK-FP16: // %bb.0:
@@ -657,27 +1154,41 @@ define <8 x i1> @test_fcmp_ule(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-FP16-NEXT: mvn v0.16b, v0.16b
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_ule:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: mvn v1.16b, v2.16b
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp ule <8 x half> %a, %b
ret <8 x i1> %1
}
define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_uno:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmge v4.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmgt v2.4s, v2.4s, v3.4s
-; CHECK-CVT-NEXT: fcmge v3.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: orr v1.16b, v2.16b, v4.16b
-; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v3.16b
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-CVT-NEXT: mvn v0.16b, v0.16b
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_uno:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmge v4.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v2.4s, v3.4s
+; CHECK-CVT-SD-NEXT: fcmge v3.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: orr v1.16b, v2.16b, v4.16b
+; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v3.16b
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-CVT-SD-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_uno:
; CHECK-FP16: // %bb.0:
@@ -687,26 +1198,44 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-FP16-NEXT: mvn v0.16b, v0.16b
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_uno:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmge v4.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v3.4s, v2.4s
+; CHECK-CVT-GI-NEXT: fcmge v3.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: orr v1.16b, v2.16b, v4.16b
+; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v3.16b
+; CHECK-CVT-GI-NEXT: mvn v1.16b, v1.16b
+; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp uno <8 x half> %a, %b
ret <8 x i1> %1
}
define <8 x i1> @test_fcmp_one(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_one:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmgt v4.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmgt v2.4s, v2.4s, v3.4s
-; CHECK-CVT-NEXT: fcmgt v3.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: orr v1.16b, v2.16b, v4.16b
-; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v3.16b
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_one:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmgt v4.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v2.4s, v3.4s
+; CHECK-CVT-SD-NEXT: fcmgt v3.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: orr v1.16b, v2.16b, v4.16b
+; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v3.16b
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_one:
; CHECK-FP16: // %bb.0:
@@ -715,136 +1244,212 @@ define <8 x i1> @test_fcmp_one(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-FP16-NEXT: orr v0.16b, v0.16b, v2.16b
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_one:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmgt v4.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v3.4s, v2.4s
+; CHECK-CVT-GI-NEXT: fcmgt v3.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: orr v1.16b, v2.16b, v4.16b
+; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v3.16b
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp one <8 x half> %a, %b
ret <8 x i1> %1
}
define <8 x i1> @test_fcmp_oeq(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_oeq:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmeq v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmeq v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_oeq:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmeq v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_oeq:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcmeq v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_oeq:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmeq v2.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fcmeq v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp oeq <8 x half> %a, %b
ret <8 x i1> %1
}
define <8 x i1> @test_fcmp_ogt(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_ogt:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmgt v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmgt v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_ogt:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_ogt:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcmgt v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_ogt:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp ogt <8 x half> %a, %b
ret <8 x i1> %1
}
define <8 x i1> @test_fcmp_oge(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_oge:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmge v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmge v0.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_oge:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmge v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_oge:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcmge v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_oge:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmge v2.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fcmge v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp oge <8 x half> %a, %b
ret <8 x i1> %1
}
define <8 x i1> @test_fcmp_olt(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_olt:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcmgt v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_olt:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_olt:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_olt:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v3.4s, v2.4s
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp olt <8 x half> %a, %b
ret <8 x i1> %1
}
define <8 x i1> @test_fcmp_ole(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_ole:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcmge v2.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmge v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_ole:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcmge v2.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmge v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_ole:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcmge v0.8h, v1.8h, v0.8h
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_ole:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmge v2.4s, v3.4s, v2.4s
+; CHECK-CVT-GI-NEXT: fcmge v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp ole <8 x half> %a, %b
ret <8 x i1> %1
}
define <8 x i1> @test_fcmp_ord(<8 x half> %a, <8 x half> %b) #0 {
-; CHECK-CVT-LABEL: test_fcmp_ord:
-; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h
-; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h
-; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h
-; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-CVT-NEXT: fcmge v4.4s, v3.4s, v2.4s
-; CHECK-CVT-NEXT: fcmgt v2.4s, v2.4s, v3.4s
-; CHECK-CVT-NEXT: fcmge v3.4s, v0.4s, v1.4s
-; CHECK-CVT-NEXT: fcmgt v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT: orr v1.16b, v2.16b, v4.16b
-; CHECK-CVT-NEXT: orr v0.16b, v0.16b, v3.16b
-; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h
-; CHECK-CVT-NEXT: xtn v0.8b, v0.8h
-; CHECK-CVT-NEXT: ret
+; CHECK-CVT-SD-LABEL: test_fcmp_ord:
+; CHECK-CVT-SD: // %bb.0:
+; CHECK-CVT-SD-NEXT: fcvtl2 v2.4s, v1.8h
+; CHECK-CVT-SD-NEXT: fcvtl2 v3.4s, v0.8h
+; CHECK-CVT-SD-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: fcmge v4.4s, v3.4s, v2.4s
+; CHECK-CVT-SD-NEXT: fcmgt v2.4s, v2.4s, v3.4s
+; CHECK-CVT-SD-NEXT: fcmge v3.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-SD-NEXT: orr v1.16b, v2.16b, v4.16b
+; CHECK-CVT-SD-NEXT: orr v0.16b, v0.16b, v3.16b
+; CHECK-CVT-SD-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-LABEL: test_fcmp_ord:
; CHECK-FP16: // %bb.0:
@@ -853,8 +1458,27 @@ define <8 x i1> @test_fcmp_ord(<8 x half> %a, <8 x half> %b) #0 {
; CHECK-FP16-NEXT: orr v0.16b, v0.16b, v2.16b
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: test_fcmp_ord:
+; CHECK-CVT-GI: // %bb.0:
+; CHECK-CVT-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-CVT-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-CVT-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-CVT-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-CVT-GI-NEXT: fcmge v4.4s, v2.4s, v3.4s
+; CHECK-CVT-GI-NEXT: fcmgt v2.4s, v3.4s, v2.4s
+; CHECK-CVT-GI-NEXT: fcmge v3.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: orr v1.16b, v2.16b, v4.16b
+; CHECK-CVT-GI-NEXT: orr v0.16b, v0.16b, v3.16b
+; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-CVT-GI-NEXT: ret
%1 = fcmp ord <8 x half> %a, %b
ret <8 x i1> %1
}
attributes #0 = { nounwind }
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-CVT: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/strict-fp-opt.ll b/llvm/test/CodeGen/AArch64/strict-fp-opt.ll
index bb7cd22c01b41..c433291ff576a 100644
--- a/llvm/test/CodeGen/AArch64/strict-fp-opt.ll
+++ b/llvm/test/CodeGen/AArch64/strict-fp-opt.ll
@@ -1,31 +1,40 @@
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
-; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - | FileCheck %s
-
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI: warning: Instruction selection used fallback path for unused_div_fpexcept_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for unused_div_round_dynamic
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_twice_fpexcept_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for add_twice_round_dynamic
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for set_rounding
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for set_rounding_fpexcept_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for set_rounding_round_dynamic
; Div whose result is unused should be removed unless we have strict exceptions
-; CHECK-LABEL: unused_div:
-; CHECK-NOT: fdiv
-; CHECK: ret
define void @unused_div(float %x, float %y) {
+; CHECK-LABEL: unused_div:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
entry:
%add = fdiv float %x, %y
ret void
}
-; CHECK-LABEL: unused_div_fpexcept_strict:
-; CHECK: fdiv s0, s0, s1
-; CHECK-NEXT: ret
define void @unused_div_fpexcept_strict(float %x, float %y) #0 {
+; CHECK-LABEL: unused_div_fpexcept_strict:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fdiv s0, s0, s1
+; CHECK-NEXT: ret
entry:
%add = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
ret void
}
-; CHECK-LABEL: unused_div_round_dynamic:
-; CHECK-NOT: fdiv
-; CHECK: ret
define void @unused_div_round_dynamic(float %x, float %y) #0 {
+; CHECK-LABEL: unused_div_round_dynamic:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ret
entry:
%add = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
ret void
@@ -33,14 +42,14 @@ entry:
; Machine CSE should eliminate the second add unless we have strict exceptions
-
-; CHECK-LABEL: add_twice:
-; CHECK: fadd [[ADD:s[0-9]+]], s0, s1
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: fmul [[MUL:s[0-9]+]], [[ADD]], [[ADD]]
-; CHECK-NEXT: fcsel s0, [[ADD]], [[MUL]], eq
-; CHECK-NEXT: ret
define float @add_twice(float %x, float %y, i32 %n) {
+; CHECK-LABEL: add_twice:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: fmul s1, s0, s0
+; CHECK-NEXT: fcsel s0, s0, s1, eq
+; CHECK-NEXT: ret
entry:
%add = fadd float %x, %y
%tobool.not = icmp eq i32 %n, 0
@@ -56,15 +65,17 @@ if.end:
ret float %a.0
}
-; CHECK-LABEL: add_twice_fpexcept_strict:
-; CHECK: fmov [[X:s[0-9]+]], s0
-; CHECK-NEXT: fadd s0, s0, s1
-; CHECK-NEXT: cbz w0, [[LABEL:.LBB[0-9_]+]]
-; CHECK: fadd [[ADD:s[0-9]+]], [[X]], s1
-; CHECK-NEXT: fmul s0, s0, [[ADD]]
-; CHECK: [[LABEL]]:
-; CHECK-NEXT: ret
define float @add_twice_fpexcept_strict(float %x, float %y, i32 %n) #0 {
+; CHECK-LABEL: add_twice_fpexcept_strict:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov s2, s0
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: cbz w0, .LBB4_2
+; CHECK-NEXT: // %bb.1: // %if.then
+; CHECK-NEXT: fadd s1, s2, s1
+; CHECK-NEXT: fmul s0, s0, s1
+; CHECK-NEXT: .LBB4_2: // %if.end
+; CHECK-NEXT: ret
entry:
%add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
%tobool.not = icmp eq i32 %n, 0
@@ -80,14 +91,15 @@ if.end:
ret float %a.0
}
-; CHECK-LABEL: add_twice_round_dynamic:
-; CHECK: fadd s0, s0, s1
-; CHECK-NEXT: cbz w0, [[LABEL:.LBB[0-9_]+]]
-; CHECK-NOT: fadd
-; CHECK: fmul s0, s0, s0
-; CHECK: [[LABEL]]:
-; CHECK-NEXT: ret
define float @add_twice_round_dynamic(float %x, float %y, i32 %n) #0 {
+; CHECK-LABEL: add_twice_round_dynamic:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: cbz w0, .LBB5_2
+; CHECK-NEXT: // %bb.1: // %if.then
+; CHECK-NEXT: fmul s0, s0, s0
+; CHECK-NEXT: .LBB5_2: // %if.end
+; CHECK-NEXT: ret
entry:
%add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
%tobool.not = icmp eq i32 %n, 0
@@ -108,17 +120,18 @@ if.end:
; dynamic (as they may give
diff erent results) or when we have strict exceptions
; (the llvm.set.rounding is irrelevant, but both could trap).
-; CHECK-LABEL: set_rounding:
-; CHECK-DAG: fadd [[SREG:s[0-9]+]], s0, s1
-; CHECK-DAG: mrs [[XREG1:x[0-9]+]], FPCR
-; CHECK-DAG: orr [[XREG2:x[0-9]+]], [[XREG1]], #0xc00000
-; CHECK: msr FPCR, [[XREG2]]
-; CHECK-NEXT: mrs [[XREG3:x[0-9]+]], FPCR
-; CHECK-NEXT: and [[XREG4:x[0-9]+]], [[XREG3]], #0xffffffffff3fffff
-; CHECK-NEXT: msr FPCR, [[XREG4]]
-; CHECK-NEXT: fsub s0, [[SREG]], [[SREG]]
-; CHECK-NEXT: ret
define float @set_rounding(float %x, float %y) {
+; CHECK-LABEL: set_rounding:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mrs x8, FPCR
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: orr x8, x8, #0xc00000
+; CHECK-NEXT: msr FPCR, x8
+; CHECK-NEXT: mrs x8, FPCR
+; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff
+; CHECK-NEXT: msr FPCR, x8
+; CHECK-NEXT: fsub s0, s0, s0
+; CHECK-NEXT: ret
entry:
%add1 = fadd float %x, %y
call void @llvm.set.rounding(i32 0)
@@ -128,18 +141,19 @@ entry:
ret float %sub
}
-; CHECK-LABEL: set_rounding_fpexcept_strict:
-; CHECK-DAG: fadd [[SREG1:s[0-9]+]], s0, s1
-; CHECK-DAG: mrs [[XREG1:x[0-9]+]], FPCR
-; CHECK-DAG: orr [[XREG2:x[0-9]+]], [[XREG1]], #0xc00000
-; CHECK: msr FPCR, [[XREG2]]
-; CHECK-DAG: fadd [[SREG2:s[0-9]+]], s0, s1
-; CHECK-DAG: mrs [[XREG3:x[0-9]+]], FPCR
-; CHECK-DAG: and [[XREG4:x[0-9]+]], [[XREG3]], #0xffffffffff3fffff
-; CHECK-NEXT: msr FPCR, [[XREG4]]
-; CHECK-NEXT: fsub s0, [[SREG1]], [[SREG2]]
-; CHECK-NEXT: ret
define float @set_rounding_fpexcept_strict(float %x, float %y) #0 {
+; CHECK-LABEL: set_rounding_fpexcept_strict:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fadd s2, s0, s1
+; CHECK-NEXT: mrs x8, FPCR
+; CHECK-NEXT: orr x8, x8, #0xc00000
+; CHECK-NEXT: msr FPCR, x8
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: mrs x8, FPCR
+; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff
+; CHECK-NEXT: msr FPCR, x8
+; CHECK-NEXT: fsub s0, s2, s0
+; CHECK-NEXT: ret
entry:
%add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
call void @llvm.set.rounding(i32 0) #0
@@ -149,18 +163,19 @@ entry:
ret float %sub
}
-; CHECK-LABEL: set_rounding_round_dynamic:
-; CHECK-DAG: fadd [[SREG1:s[0-9]+]], s0, s1
-; CHECK-DAG: mrs [[XREG1:x[0-9]+]], FPCR
-; CHECK-DAG: orr [[XREG2:x[0-9]+]], [[XREG1]], #0xc00000
-; CHECK: msr FPCR, [[XREG2]]
-; CHECK-DAG: fadd [[SREG2:s[0-9]+]], s0, s1
-; CHECK-DAG: mrs [[XREG3:x[0-9]+]], FPCR
-; CHECK-DAG: and [[XREG4:x[0-9]+]], [[XREG3]], #0xffffffffff3fffff
-; CHECK-NEXT: msr FPCR, [[XREG4]]
-; CHECK-NEXT: fsub s0, [[SREG1]], [[SREG2]]
-; CHECK-NEXT: ret
define float @set_rounding_round_dynamic(float %x, float %y) #0 {
+; CHECK-LABEL: set_rounding_round_dynamic:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mrs x8, FPCR
+; CHECK-NEXT: fadd s2, s0, s1
+; CHECK-NEXT: orr x8, x8, #0xc00000
+; CHECK-NEXT: msr FPCR, x8
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: mrs x8, FPCR
+; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff
+; CHECK-NEXT: msr FPCR, x8
+; CHECK-NEXT: fsub s0, s2, s0
+; CHECK-NEXT: ret
entry:
%add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
call void @llvm.set.rounding(i32 0) #0
@@ -178,3 +193,6 @@ declare i32 @llvm.get.rounding()
declare void @llvm.set.rounding(i32)
attributes #0 = { strictfp }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}
More information about the llvm-commits
mailing list