[llvm] [AArch64][GlobalISel] Fix vector lrint/llrint fallbacks (PR #170814)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 5 00:43:04 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
@llvm/pr-subscribers-llvm-globalisel
Author: None (ayank227)
<details>
<summary>Changes</summary>
Add .lower() to vector lrint/llrint to enable lowering instead of falling back to SelectionDAG.
---
Patch is 261.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170814.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+4-2)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir (+8-8)
- (modified) llvm/test/CodeGen/AArch64/vector-llrint.ll (+1301-712)
- (modified) llvm/test/CodeGen/AArch64/vector-lrint.ll (+2502-1473)
``````````diff
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 44a148940ec96..112f3c2b9634a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -450,13 +450,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor({{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
.legalFor(HasFP16, {{s32, s16}, {s64, s16}})
.minScalar(1, s32)
- .libcallFor({{s64, s128}});
+ .libcallFor({{s64, s128}})
+ .lower();
getActionDefinitionsBuilder({G_LLROUND, G_INTRINSIC_LLRINT})
.legalFor({{s64, s32}, {s64, s64}})
.legalFor(HasFP16, {{s64, s16}})
.minScalar(0, s64)
.minScalar(1, s32)
- .libcallFor({{s64, s128}});
+ .libcallFor({{s64, s128}})
+ .lower();
// TODO: Custom legalization for mismatched types.
getActionDefinitionsBuilder(G_FCOPYSIGN)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index af72ffbcfadce..8a0071c9ea5c1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -188,12 +188,12 @@
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_INTRINSIC_LRINT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
-# DEBUG-NEXT: .. the first uncovered type index: 2, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_INTRINSIC_LLRINT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
-# DEBUG-NEXT: .. the first uncovered type index: 2, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_INTRINSIC_ROUNDEVEN (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
@@ -696,11 +696,11 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_LROUND (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
-# DEBUG-NEXT: .. the first uncovered type index: 2, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_LLROUND (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
-# DEBUG-NEXT: .. the first uncovered type index: 2, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_BR (opcode {{[0-9]+}}): 0 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll
index 45c9e4c9c7194..4e86832a5dffa 100644
--- a/llvm/test/CodeGen/AArch64/vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
; CHECK-LABEL: llrint_v1i64_v1f16:
@@ -15,369 +16,530 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind {
-; CHECK-LABEL: llrint_v1i64_v2f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[1]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x9
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: llrint_v1i64_v2f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: mov h1, v0.h[1]
+; CHECK-SD-NEXT: fcvt s0, h0
+; CHECK-SD-NEXT: fcvt s1, h1
+; CHECK-SD-NEXT: frintx s0, s0
+; CHECK-SD-NEXT: frintx s1, s1
+; CHECK-SD-NEXT: fcvtzs x8, s0
+; CHECK-SD-NEXT: fcvtzs x9, s1
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: mov v0.d[1], x9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: llrint_v1i64_v2f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT: frintx v0.2s, v0.2s
+; CHECK-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT: fcvtl v0.2d, v0.2s
+; CHECK-GI-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x)
ret <2 x i64> %a
}
declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
-; CHECK-LABEL: llrint_v4i64_v4f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[2]
-; CHECK-NEXT: mov h2, v0.h[1]
-; CHECK-NEXT: mov h3, v0.h[3]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: fcvt s2, h2
-; CHECK-NEXT: fcvt s3, h3
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: frintx s2, s2
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: fcvtzs x8, s0
-; CHECK-NEXT: fcvtzs x9, s1
-; CHECK-NEXT: fcvtzs x10, s2
-; CHECK-NEXT: fcvtzs x11, s3
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: mov v0.d[1], x10
-; CHECK-NEXT: mov v1.d[1], x11
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: llrint_v4i64_v4f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: mov h1, v0.h[2]
+; CHECK-SD-NEXT: mov h2, v0.h[1]
+; CHECK-SD-NEXT: mov h3, v0.h[3]
+; CHECK-SD-NEXT: fcvt s0, h0
+; CHECK-SD-NEXT: fcvt s1, h1
+; CHECK-SD-NEXT: fcvt s2, h2
+; CHECK-SD-NEXT: fcvt s3, h3
+; CHECK-SD-NEXT: frintx s0, s0
+; CHECK-SD-NEXT: frintx s1, s1
+; CHECK-SD-NEXT: frintx s2, s2
+; CHECK-SD-NEXT: frintx s3, s3
+; CHECK-SD-NEXT: fcvtzs x8, s0
+; CHECK-SD-NEXT: fcvtzs x9, s1
+; CHECK-SD-NEXT: fcvtzs x10, s2
+; CHECK-SD-NEXT: fcvtzs x11, s3
+; CHECK-SD-NEXT: fmov d0, x8
+; CHECK-SD-NEXT: fmov d1, x9
+; CHECK-SD-NEXT: mov v0.d[1], x10
+; CHECK-SD-NEXT: mov v1.d[1], x11
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: llrint_v4i64_v4f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT: frintx v0.4s, v0.4s
+; CHECK-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT: fcvtl v1.2d, v0.2s
+; CHECK-GI-NEXT: fcvtl2 v2.2d, v0.4s
+; CHECK-GI-NEXT: fcvtzs v0.2d, v1.2d
+; CHECK-GI-NEXT: fcvtzs v1.2d, v2.2d
+; CHECK-GI-NEXT: ret
%a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x)
ret <4 x i64> %a
}
declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>)
define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
-; CHECK-LABEL: llrint_v8i64_v8f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov h4, v0.h[2]
-; CHECK-NEXT: mov h3, v0.h[1]
-; CHECK-NEXT: mov h7, v0.h[3]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: mov h2, v1.h[2]
-; CHECK-NEXT: mov h5, v1.h[1]
-; CHECK-NEXT: mov h6, v1.h[3]
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: fcvt s4, h4
-; CHECK-NEXT: fcvt s3, h3
-; CHECK-NEXT: fcvt s7, h7
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: fcvt s2, h2
-; CHECK-NEXT: fcvt s5, h5
-; CHECK-NEXT: fcvt s6, h6
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: frintx s3, s3
-; CHECK-NEXT: frintx s7, s7
-; CHECK-NEXT: fcvtzs x9, s0
-; CHECK-NEXT: frintx s2, s2
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: frintx s6, s6
-; CHECK-NEXT: fcvtzs x8, s1
-; CHECK-NEXT: fcvtzs x12, s4
-; CHECK-NEXT: fcvtzs x11, s3
-; CHECK-NEXT: fcvtzs x15, s7
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: fcvtzs x10, s2
-; CHECK-NEXT: fcvtzs x13, s5
-; CHECK-NEXT: fcvtzs x14, s6
-; CHECK-NEXT: fmov d2, x8
-; CHECK-NEXT: fmov d1, x12
-; CHECK-NEXT: mov v0.d[1], x11
-; CHECK-NEXT: fmov d3, x10
-; CHECK-NEXT: mov v2.d[1], x13
-; CHECK-NEXT: mov v1.d[1], x15
-; CHECK-NEXT: mov v3.d[1], x14
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: llrint_v8i64_v8f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: mov h4, v0.h[2]
+; CHECK-SD-NEXT: mov h3, v0.h[1]
+; CHECK-SD-NEXT: mov h7, v0.h[3]
+; CHECK-SD-NEXT: fcvt s0, h0
+; CHECK-SD-NEXT: mov h2, v1.h[2]
+; CHECK-SD-NEXT: mov h5, v1.h[1]
+; CHECK-SD-NEXT: mov h6, v1.h[3]
+; CHECK-SD-NEXT: fcvt s1, h1
+; CHECK-SD-NEXT: fcvt s4, h4
+; CHECK-SD-NEXT: fcvt s3, h3
+; CHECK-SD-NEXT: fcvt s7, h7
+; CHECK-SD-NEXT: frintx s0, s0
+; CHECK-SD-NEXT: fcvt s2, h2
+; CHECK-SD-NEXT: fcvt s5, h5
+; CHECK-SD-NEXT: fcvt s6, h6
+; CHECK-SD-NEXT: frintx s1, s1
+; CHECK-SD-NEXT: frintx s4, s4
+; CHECK-SD-NEXT: frintx s3, s3
+; CHECK-SD-NEXT: frintx s7, s7
+; CHECK-SD-NEXT: fcvtzs x9, s0
+; CHECK-SD-NEXT: frintx s2, s2
+; CHECK-SD-NEXT: frintx s5, s5
+; CHECK-SD-NEXT: frintx s6, s6
+; CHECK-SD-NEXT: fcvtzs x8, s1
+; CHECK-SD-NEXT: fcvtzs x12, s4
+; CHECK-SD-NEXT: fcvtzs x11, s3
+; CHECK-SD-NEXT: fcvtzs x15, s7
+; CHECK-SD-NEXT: fmov d0, x9
+; CHECK-SD-NEXT: fcvtzs x10, s2
+; CHECK-SD-NEXT: fcvtzs x13, s5
+; CHECK-SD-NEXT: fcvtzs x14, s6
+; CHECK-SD-NEXT: fmov d2, x8
+; CHECK-SD-NEXT: fmov d1, x12
+; CHECK-SD-NEXT: mov v0.d[1], x11
+; CHECK-SD-NEXT: fmov d3, x10
+; CHECK-SD-NEXT: mov v2.d[1], x13
+; CHECK-SD-NEXT: mov v1.d[1], x15
+; CHECK-SD-NEXT: mov v3.d[1], x14
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: llrint_v8i64_v8f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NEXT: frintx v1.4s, v1.4s
+; CHECK-GI-NEXT: frintx v0.4s, v0.4s
+; CHECK-GI-NEXT: fcvtn v1.4h, v1.4s
+; CHECK-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT: fcvtl v2.2d, v1.2s
+; CHECK-GI-NEXT: fcvtl2 v1.2d, v1.4s
+; CHECK-GI-NEXT: fcvtl v3.2d, v0.2s
+; CHECK-GI-NEXT: fcvtl2 v4.2d, v0.4s
+; CHECK-GI-NEXT: fcvtzs v0.2d, v2.2d
+; CHECK-GI-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-GI-NEXT: fcvtzs v2.2d, v3.2d
+; CHECK-GI-NEXT: fcvtzs v3.2d, v4.2d
+; CHECK-GI-NEXT: ret
%a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x)
ret <8 x i64> %a
}
declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>)
define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
-; CHECK-LABEL: llrint_v16i64_v16f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: mov h17, v0.h[1]
-; CHECK-NEXT: mov h19, v0.h[2]
-; CHECK-NEXT: fcvt s18, h0
-; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: mov h4, v2.h[1]
-; CHECK-NEXT: mov h5, v2.h[2]
-; CHECK-NEXT: fcvt s7, h3
-; CHECK-NEXT: fcvt s6, h2
-; CHECK-NEXT: mov h16, v3.h[2]
-; CHECK-NEXT: mov h2, v2.h[3]
-; CHECK-NEXT: fcvt s17, h17
-; CHECK-NEXT: fcvt s19, h19
-; CHECK-NEXT: frintx s18, s18
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fcvt s4, h4
-; CHECK-NEXT: fcvt s5, h5
-; CHECK-NEXT: frintx s7, s7
-; CHECK-NEXT: frintx s6, s6
-; CHECK-NEXT: fcvt s16, h16
-; CHECK-NEXT: fcvt s2, h2
-; CHECK-NEXT: frintx s17, s17
-; CHECK-NEXT: frintx s19, s19
-; CHECK-NEXT: fcvtzs x13, s18
-; CHECK-NEXT: frintx s0, s0
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: fcvtzs x9, s7
-; CHECK-NEXT: mov h7, v1.h[2]
-; CHECK-NEXT: fcvtzs x8, s6
-; CHECK-NEXT: mov h6, v1.h[1]
-; CHECK-NEXT: frintx s16, s16
-; CHECK-NEXT: fcvtzs x14, s17
-; CHECK-NEXT: fcvtzs x15, s19
-; CHECK-NEXT: fcvtzs x10, s4
-; CHECK-NEXT: mov h4, v3.h[1]
-; CHECK-NEXT: fcvtzs x11, s5
-; CHECK-NEXT: mov h5, v1.h[3]
-; CHECK-NEXT: mov h3, v3.h[3]
-; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: fcvt s7, h7
-; CHECK-NEXT: fcvt s6, h6
-; CHECK-NEXT: fcvtzs x12, s16
-; CHECK-NEXT: frintx s16, s2
-; CHECK-NEXT: fmov d2, x8
-; CHECK-NEXT: fcvt s4, h4
-; CHECK-NEXT: fcvt s3, h3
-; CHECK-NEXT: fcvt s5, h5
-; CHECK-NEXT: frintx s1, s1
-; CHECK-NEXT: frintx s7, s7
-; CHECK-NEXT: frintx s17, s6
-; CHECK-NEXT: fmov d6, x9
-; CHECK-NEXT: mov v2.d[1], x10
-; CHECK-NEXT: frintx s4, s4
-; CHECK-NEXT: frintx s18, s3
-; CHECK-NEXT: frintx s5, s5
-; CHECK-NEXT: fcvtzs x8, s1
-; CHECK-NEXT: fcvtzs x9, s7
-; CHECK-NEXT: fmov d3, x11
-; CHECK-NEXT: fcvtzs x11, s0
-; CHECK-NEXT: fmov d7, x12
-; CHECK-NEXT: fcvtzs x12, s16
-; CHECK-NEXT: fcvtzs x16, s17
-; CHECK-NEXT: fcvtzs x17, s4
-; CHECK-NEXT: fmov d0, x13
-; CHECK-NEXT: fmov d1, x15
-; CHECK-NEXT: fcvtzs x18, s18
-; CHECK-NEXT: fcvtzs x0, s5
-; CHECK-NEXT: fmov d4, x8
-; CHECK-NEXT: fmov d5, x9
-; CHECK-NEXT: mov v0.d[1], x14
-; CHECK-NEXT: mov v1.d[1], x11
-; CHECK-NEXT: mov v3.d[1], x12
-; CHECK-NEXT: mov v4.d[1], x16
-; CHECK-NEXT: mov v6.d[1], x17
-; CHECK-NEXT: mov v7.d[1], x18
-; CHECK-NEXT: mov v5.d[1], x0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: llrint_v16i64_v16f16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: ext v3.16b, v1.16b, v1.16b, #8
+; CHECK-SD-NEXT: mov h17, v0.h[1]
+; CHECK-SD-NEXT: mov h19, v0.h[2]
+; CHECK-SD-NEXT: fcvt s18, h0
+; CHECK-SD-NEXT: mov h0, v0.h[3]
+; CHECK-SD-NEXT: mov h4, v2.h[1]
+; CHECK-SD-NEXT: mov h5, v2.h[2]
+; CHECK-SD-NEXT: fcvt s7, h3
+; CHECK-SD-NEXT: fcvt s6, h2
+; CHECK-SD-NEXT: mov h16, v3.h[2]
+; CHECK-SD-NEXT: mov h2, v2.h[3]
+; CHECK-SD-NEXT: fcvt s17, h17
+; CHECK-SD-NEXT: fcvt s19, h19
+; CHECK-SD-NEXT: frintx s18, s18
+; CHECK-SD-NEXT: fcvt s0, h0
+; CHECK-SD-NEXT: fcvt s4, h4
+; CHECK-SD-NEXT: fcvt s5, h5
+; CHECK-SD-NEXT: frintx s7, s7
+; CHECK-SD-NEXT: frintx s6, s6
+; CHECK-SD-NEXT: fcvt s16, h16
+; CHECK-SD-NEXT: fcvt s2, h2
+; CHECK-SD-NEXT: frintx s17, s17
+; CHECK-SD-NEXT: frintx s19, s19
+; CHECK-SD-NEXT: fcvtzs x13, s18
+; CHECK-SD-NEXT: frintx s0, s0
+; CHECK-SD-NEXT: frintx s4, s4
+; CHECK-SD-NEXT: frintx s5, s5
+; CHECK-SD-NEXT: fcvtzs x9, s7
+; CHECK-SD-NEXT: mov h7, v1.h[2]
+; CHECK-SD-NEXT: fcvtzs x8, s6
+; CHECK-SD-NEXT: mov h6, v1.h[1]
+; CHECK-SD-NEXT: frintx s16, s16
+; CHECK-SD-NEXT: fcvtzs x14, s17
+; CHECK-SD-NEXT: fcvtzs x15, s19
+; CHECK-SD-NEXT: fcvtzs x10, s4
+; CHECK-SD-NEXT: mov h4, v3.h[1]
+; CHECK-SD-NEXT: fcvtzs x11, s5
+; CHECK-SD-NEXT: mov h5, v1.h[3]
+; CHECK-SD-NEXT: mov h3, v3.h[3]
+; CHECK-SD-NEXT: fcvt s1, h1
+; CHECK-SD-NEXT: fcvt s7, h7
+; CHECK-SD-NEXT: fcvt s6, h6
+; CHECK-SD-NEXT: fcvtzs x12, s16
+; CHECK-SD-NEXT: frintx s16, s2
+; CHECK-SD-NEXT: fmov d2, x8
+; CHECK-SD-NEXT: fcvt s4, h4
+; CHECK-SD-NEXT: fcvt s3, h3
+; CHECK-SD-NEXT: fcvt s5, h5
+; CHECK-SD-NEXT: frintx s1, s1
+; CHECK-SD-NEXT: frintx s7, s7
+; CHECK-SD-NEXT: frintx s17, s6
+; CHECK-SD-NEXT: fmov d6, x9
+; CHECK-SD-NEXT: mov v2.d[1], x10
+; CHECK-SD-NEXT: frintx s4, s4
+; CHECK-SD-NEXT: frintx s18, s3
+; CHECK-SD-NEXT: frintx s5, s5
+; CHECK-SD-NEXT: fcvtzs x8, s1
+; CHECK-SD-NEXT: fcvtzs x9, s7
+; CHECK-SD-NEXT: fmov d3, x11
+; CHECK-SD-NEXT: fcvtzs x11, s0
+; CHECK-SD-NEXT: fmov d7, x12
+; CHECK-SD-NEXT: fcvtzs x12, s16
+; CHECK-SD-NEXT: fcvtzs x16, s17
+; CHECK-SD-NEXT: fcvtzs x17, s4
+; CHECK-SD-NEXT: fmov d0, x13
+; CHECK-SD-NEXT: fmov d1, x15
+; CHECK-SD-NEXT: fcvtzs x18, s18
+; CHECK-SD-NEXT: fcvtzs x0, s5
+; CHECK-SD-NEXT: fmov d4, x8
+; CHECK-SD-NEXT: fmov d5, x9
+; CHECK-SD-NEXT: mov v0.d[1], x14
+; CHECK-SD-NEXT: mov v1.d[1], x11
+; CHECK-SD-NEXT: mov v3.d[1], x12
+; CHECK-SD-NEXT: mov v4.d[1], x16
+; CHECK-SD-NEXT: mov v6.d[1], x17
+; CHECK-SD-NEXT: mov v7.d[1], x18
+; CHECK-SD-NEXT: mov v5.d[1], x0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: llrint_v16i64_v16f16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-GI-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-GI-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NEXT: frintx v2.4s, v2.4s
+; CHECK-GI-NEXT: frintx v0.4s, v0.4s
+; CHECK-GI-NEXT: frintx v3.4s, v3.4s
+; CHECK-GI-NEXT: frintx v1.4s, v1.4s
+; CHECK-GI-NEXT: fcvtn v2.4h, v2.4s
+; CHECK-GI-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-GI-NEXT: fcvtn v3.4h, v3.4s
+; CHECK-GI-NEXT: fcvtn v1.4h, v1.4s
+; CHECK-GI-NEXT: fcvtl v2.4s, v2.4h
+; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NEXT: fcvtl v3.4s, v3.4h
+; CHECK-GI-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-GI-NEXT: fcvtl v4.2d, v2.2s
+; CHECK-GI-NEXT: fcvtl2 v2.2d, v2.4s
+; CHECK-GI-NEXT: fcvtl v5.2d, v0.2s
+; CHECK-GI-NEXT: fcvtl2 v6.2d, v0.4s
+; CHECK-GI-NEXT: fcvtl v7.2d, v3.2s
+; CHECK-GI-NEXT: fcvtl2 v16.2d, v3.4s
+; CHECK-GI-NEXT: fcvtl v17.2d, v1.2s
+; CHECK-GI-NEXT: fcvtl2 v18.2d, v1.4s
+; CHECK-GI-NEXT: fcvtzs v0.2d, v4.2d
+; CHECK-GI-NEXT: fcvtzs v1.2d, v2.2d
+; CHECK-GI-NEXT: fcvtzs v2.2d, v5.2d
+; CHECK-GI-NEXT: fcvtzs v3.2d, v6.2d
+; CHECK-GI-NEXT: fcvtzs v4.2d, v7.2d
+; CHECK-GI-NEXT: fcvtzs v5.2d, v16.2d
+; CHECK-GI-NEXT: fcvtzs v6.2d, v17.2d
+; CHECK-GI-NEXT: fcvtzs v7.2d, v18.2d
+; CHECK-GI-NEXT: ret
%a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x)
ret <16 x i64> %a
}
declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
-; CHECK-LABEL: llrint_v32i64_v32f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8
-; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: mov h19, v0.h[1]
-; CHECK-NEXT: fcvt s21, h0
-; CHECK-NEXT: mov h23, v1.h[2]
-; CHECK-NEXT: fcvt s22, h1
-; CHECK-NEXT: fcvt s26, h2
-; CHECK-NEXT: mov h27, v2.h[1]
-; CHECK-NEXT: mov h28, v2.h[2]
-; CHECK-NEXT: mov h16, v4.h[2]
-; CHECK-NEXT: fcvt s17, h5
-; CHECK-NEXT: mov h18, v5.h[2]
-; CHECK-NEXT: mov h20, v6.h[2]
-; CHECK-NEXT: fcvt s24, h7
-; CHECK-NEXT: fcvt s25, h6
-; CHECK-NEXT: fcvt s19, h19
-; CHECK-NEXT: frintx s22, s22
-; CHECK-NEXT: fcvt s16, h16
-; CHECK-NEXT: frintx s17, s17
-; CHECK-NEXT: fcvt s18, h18
-; CHECK-NEXT: fcvt s20, h20
-; CHECK-NEXT: frintx s16, s16
-; CHECK-NEXT: fcvtzs x12, s17
-; CHECK-NEXT: frintx s17, s18
-; CHECK-NEXT: frintx s18, s21
-; CHECK-NEXT: fcvt s21, h23
-; CHECK-NEXT: frintx s23, s24
-; CHECK-NEXT: frintx s24, s25
-; CHECK-NEXT: frintx s25, s19
-; CHECK-NEXT: mov h19, v7.h[1]
-; CHECK-NEXT: fcvtzs x13, s16
-; CHECK-NEXT: frintx s16, s20
-; CHECK-NEXT: frintx s20, s26
-; CHECK-NEXT: fcvtzs x9, s23
-; CHECK-NE...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/170814
More information about the llvm-commits
mailing list