[llvm] [AArch64][GlobalISel] Legalize s16 G_FCONSTANT to avoid widening to G_CONSTANT (PR #161205)
Ryan Cowan via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 6 03:45:24 PDT 2025
https://github.com/HolyMolyCowMan updated https://github.com/llvm/llvm-project/pull/161205
>From d3ea33e1477c5b1a4e9e22aa9915f6ac4aaaeb08 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 29 Sep 2025 14:17:11 +0000
Subject: [PATCH 1/4] [AArch64][GlobalISel] Legalize s16 G_FCONSTANT to avoid
widening to G_CONSTANT
---
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 7ee54c5932b15..1593f32d1fc6c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -678,8 +678,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0)
.clampScalar(0, s8, s64);
getActionDefinitionsBuilder(G_FCONSTANT)
- .legalFor({s32, s64, s128})
- .legalFor(HasFP16, {s16})
+ // Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT
+ .legalFor({s16, s32, s64, s128})
.clampScalar(0, MinFPScalar, s128);
// FIXME: fix moreElementsToNextPow2
>From cd6518d79507bd714f65b4aed4f2e1109e774b7a Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 29 Sep 2025 15:11:46 +0000
Subject: [PATCH 2/4] Update tests
---
.../AArch64/GlobalISel/legalize-constant.mir | 5 +-
.../GlobalISel/legalize-fp16-fconstant.mir | 6 +-
.../CodeGen/AArch64/arm64-indexed-memory.ll | 13 +-
llvm/test/CodeGen/AArch64/dup.ll | 30 +++--
llvm/test/CodeGen/AArch64/f16-instructions.ll | 21 ++--
llvm/test/CodeGen/AArch64/fcvt-fixed.ll | 112 +++++++++---------
.../CodeGen/AArch64/vecreduce-fadd-strict.ll | 20 ++--
llvm/test/CodeGen/AArch64/vecreduce-fadd.ll | 42 +++----
.../CodeGen/AArch64/vecreduce-fmul-strict.ll | 12 +-
9 files changed, 133 insertions(+), 128 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
index c301e76852b54..c00ce2242a888 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
@@ -48,8 +48,9 @@ body: |
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
; CHECK-NEXT: $x0 = COPY [[C1]](s64)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: $w0 = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
%0:_(s32) = G_FCONSTANT float 1.0
$w0 = COPY %0
%1:_(s64) = G_FCONSTANT double 2.0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
index ddf219dc4927e..c6df3456a8445 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
@@ -8,7 +8,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
@@ -26,7 +26,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16_non_zero
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
@@ -44,7 +44,7 @@ tracksRegLiveness: true
body: |
bb.1.entry:
; NO-FP16-LABEL: name: nan
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01
; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16)
; NO-FP16-NEXT: $w0 = COPY %ext(s32)
; NO-FP16-NEXT: RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index cb5df07c7ede4..322a96aca5db2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -739,15 +739,14 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
;
; GISEL-LABEL: postidx32_shalf:
; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #0 ; =0x0
-; GISEL-NEXT: ldr h1, [x0], #4
-; GISEL-NEXT: fmov s2, w8
+; GISEL-NEXT: movi d1, #0000000000000000
+; GISEL-NEXT: ldr h2, [x0], #4
; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0
; GISEL-NEXT: fmov w9, s0
-; GISEL-NEXT: fcvt s3, h1
-; GISEL-NEXT: fmov w8, s1
-; GISEL-NEXT: fcvt s2, h2
-; GISEL-NEXT: fcmp s3, s2
+; GISEL-NEXT: fcvt s3, h2
+; GISEL-NEXT: fmov w8, s2
+; GISEL-NEXT: fcvt s1, h1
+; GISEL-NEXT: fcmp s3, s1
; GISEL-NEXT: csel w8, w8, w9, mi
; GISEL-NEXT: strh w8, [x1]
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index 079ff1076b110..1c4a6ab2217b0 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -1469,8 +1469,9 @@ define <2 x half> @loaddup_str_v2half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v2half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1526,8 +1527,9 @@ define <3 x half> @loaddup_str_v3half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v3half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1583,8 +1585,9 @@ define <4 x half> @loaddup_str_v4half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v4half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1639,8 +1642,9 @@ define <8 x half> @loaddup_str_v8half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v8half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1713,9 +1717,10 @@ define <16 x half> @loaddup_str_v16half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v16half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h1, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d2, #0000000000000000
; CHECK-GI-NEXT: dup v0.8h, v1.h[0]
; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: str h2, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1771,8 +1776,9 @@ define <2 x bfloat> @loaddup_str_v2bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v2bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -1828,8 +1834,9 @@ define <3 x bfloat> @loaddup_str_v3bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v3bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -1885,8 +1892,9 @@ define <4 x bfloat> @loaddup_str_v4bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v4bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -1941,8 +1949,9 @@ define <8 x bfloat> @loaddup_str_v8bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v8bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -2015,9 +2024,10 @@ define <16 x bfloat> @loaddup_str_v16bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v16bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h1, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d2, #0000000000000000
; CHECK-GI-NEXT: dup v0.8h, v1.h[0]
; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: str h2, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index adc536da26f26..b234ef7a5ff8b 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -782,18 +782,19 @@ define void @test_fccmp(half %in, ptr %out) {
;
; CHECK-CVT-GI-LABEL: test_fccmp:
; CHECK-CVT-GI: // %bb.0:
-; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500
-; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800
+; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
; CHECK-CVT-GI-NEXT: fcvt s2, h0
-; CHECK-CVT-GI-NEXT: fmov s1, w8
-; CHECK-CVT-GI-NEXT: fmov s3, w9
-; CHECK-CVT-GI-NEXT: fmov w9, s0
-; CHECK-CVT-GI-NEXT: fcvt s1, h1
-; CHECK-CVT-GI-NEXT: fcvt s3, h3
-; CHECK-CVT-GI-NEXT: fcmp s2, s1
-; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi
-; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt
+; CHECK-CVT-GI-NEXT: ldr h1, [x8, :lo12:.LCPI29_0]
+; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_1
+; CHECK-CVT-GI-NEXT: ldr h4, [x8, :lo12:.LCPI29_1]
+; CHECK-CVT-GI-NEXT: fmov w8, s0
+; CHECK-CVT-GI-NEXT: fcvt s3, h1
+; CHECK-CVT-GI-NEXT: fmov w9, s1
+; CHECK-CVT-GI-NEXT: fcvt s4, h4
+; CHECK-CVT-GI-NEXT: fcmp s2, s3
+; CHECK-CVT-GI-NEXT: fccmp s2, s4, #4, mi
+; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
; CHECK-CVT-GI-NEXT: strh w8, [x0]
; CHECK-CVT-GI-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 51aad4fe25d3b..7409bfb91454c 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -166,9 +166,9 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI8_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -206,9 +206,9 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI9_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -246,9 +246,9 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI10_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -286,9 +286,9 @@ define i64 @fcvtzs_f16_i64_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI11_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -470,9 +470,9 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI20_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -510,9 +510,9 @@ define i32 @fcvtzu_f16_i32_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI21_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -550,9 +550,9 @@ define i64 @fcvtzu_f16_i64_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI22_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -590,9 +590,9 @@ define i64 @fcvtzu_f16_i64_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI23_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -775,10 +775,10 @@ define half @scvtf_f16_i32_7(i32 %int) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI32_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -815,10 +815,10 @@ define half @scvtf_f16_i32_15(i32 %int) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI33_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -855,10 +855,10 @@ define half @scvtf_f16_i64_7(i64 %long) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI34_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -895,10 +895,10 @@ define half @scvtf_f16_i64_15(i64 %long) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI35_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1079,10 +1079,10 @@ define half @ucvtf_f16_i32_7(i32 %int) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI44_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1119,10 +1119,10 @@ define half @ucvtf_f16_i32_15(i32 %int) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI45_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI45_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1159,10 +1159,10 @@ define half @ucvtf_f16_i64_7(i64 %long) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI46_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI46_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1199,10 +1199,10 @@ define half @ucvtf_f16_i64_15(i64 %long) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI47_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI47_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1373,9 +1373,9 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI55_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1413,9 +1413,9 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI56_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1453,9 +1453,9 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI57_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1493,9 +1493,9 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI58_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1667,9 +1667,9 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI66_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1707,9 +1707,9 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI67_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1747,9 +1747,9 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI68_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1787,9 +1787,9 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI69_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
index 594a3ab79d73b..be07978cd8516 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
@@ -38,10 +38,10 @@ define half @add_v2HalfH(<2 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_v2HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0]
; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[1]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
@@ -88,10 +88,10 @@ define half @add_v3HalfH(<3 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_v3HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -152,10 +152,10 @@ define half @add_HalfH(<4 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI3_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI3_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -250,9 +250,9 @@ define half @add_H(<8 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI4_0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI4_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -448,9 +448,9 @@ define half @add_2H(<16 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_2H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI7_0
; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: fmov s2, w8
+; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI7_0]
; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1]
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index 18f463cfcf7c9..40925da0557ec 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -405,26 +405,23 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
;
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v4f16_in_loop:
; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000
; CHECK-GI-NOFP16-NEXT: mov x8, xzr
-; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
; CHECK-GI-NOFP16-NEXT: .LBB13_1: // %loop
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NOFP16-NEXT: ldr d0, [x0, x8]
-; CHECK-GI-NOFP16-NEXT: fmov s1, w9
+; CHECK-GI-NOFP16-NEXT: ldr d1, [x0, x8]
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
; CHECK-GI-NOFP16-NEXT: add x8, x8, #8
; CHECK-GI-NOFP16-NEXT: cmp w8, #56
-; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
+; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fmov w9, s0
; CHECK-GI-NOFP16-NEXT: b.ne .LBB13_1
; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
-; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fadd_reduction_v4f16_in_loop:
@@ -521,28 +518,25 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
;
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v8f16_in_loop:
; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000
; CHECK-GI-NOFP16-NEXT: mov x8, xzr
-; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
; CHECK-GI-NOFP16-NEXT: .LBB14_1: // %loop
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NOFP16-NEXT: ldr q0, [x0, x8]
+; CHECK-GI-NOFP16-NEXT: ldr q1, [x0, x8]
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
; CHECK-GI-NOFP16-NEXT: add x8, x8, #8
; CHECK-GI-NOFP16-NEXT: cmp w8, #56
-; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: fmov s1, w9
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
+; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fmov w9, s0
; CHECK-GI-NOFP16-NEXT: b.ne .LBB14_1
; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
-; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fadd_reduction_v8f16_in_loop:
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
index e1b21705c95f3..c10d6e94226f2 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
@@ -52,10 +52,10 @@ define half @mul_HalfH(<4 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: mul_HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -144,9 +144,9 @@ define half @mul_H(<8 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: mul_H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -321,9 +321,9 @@ define half @mul_2H(<16 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: mul_2H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI5_0
; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: fmov s2, w8
+; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI5_0]
; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1]
>From 47a98ff15d214aab3c37f93ed4756147d6b466d6 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Fri, 3 Oct 2025 12:01:33 +0000
Subject: [PATCH 3/4] Convert s16 G_FCONSTANT to G_CONSTANT when better
represented
---
.../GISel/AArch64PreLegalizerCombiner.cpp | 2 +-
llvm/test/CodeGen/AArch64/dup.ll | 30 +++++++------------
2 files changed, 11 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 8c10673c5e7b9..9a6c8ae5c89c6 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -50,7 +50,7 @@ bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
Register DstReg = MI.getOperand(0).getReg();
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
- if (DstSize != 32 && DstSize != 64)
+ if (DstSize != 16 && DstSize != 32 && DstSize != 64)
return false;
// When we're storing a value, it doesn't matter what register bank it's on.
diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index 1c4a6ab2217b0..079ff1076b110 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -1469,9 +1469,8 @@ define <2 x half> @loaddup_str_v2half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v2half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: movi d1, #0000000000000000
+; CHECK-GI-NEXT: strh wzr, [x0]
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
-; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1527,9 +1526,8 @@ define <3 x half> @loaddup_str_v3half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v3half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: movi d1, #0000000000000000
+; CHECK-GI-NEXT: strh wzr, [x0]
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
-; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1585,9 +1583,8 @@ define <4 x half> @loaddup_str_v4half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v4half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: movi d1, #0000000000000000
+; CHECK-GI-NEXT: strh wzr, [x0]
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
-; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1642,9 +1639,8 @@ define <8 x half> @loaddup_str_v8half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v8half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: movi d1, #0000000000000000
+; CHECK-GI-NEXT: strh wzr, [x0]
; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
-; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1717,10 +1713,9 @@ define <16 x half> @loaddup_str_v16half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v16half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h1, [x0]
-; CHECK-GI-NEXT: movi d2, #0000000000000000
+; CHECK-GI-NEXT: strh wzr, [x0]
; CHECK-GI-NEXT: dup v0.8h, v1.h[0]
; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
-; CHECK-GI-NEXT: str h2, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1776,9 +1771,8 @@ define <2 x bfloat> @loaddup_str_v2bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v2bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: movi d1, #0000000000000000
+; CHECK-GI-NEXT: strh wzr, [x0]
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
-; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -1834,9 +1828,8 @@ define <3 x bfloat> @loaddup_str_v3bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v3bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: movi d1, #0000000000000000
+; CHECK-GI-NEXT: strh wzr, [x0]
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
-; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -1892,9 +1885,8 @@ define <4 x bfloat> @loaddup_str_v4bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v4bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: movi d1, #0000000000000000
+; CHECK-GI-NEXT: strh wzr, [x0]
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
-; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -1949,9 +1941,8 @@ define <8 x bfloat> @loaddup_str_v8bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v8bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: movi d1, #0000000000000000
+; CHECK-GI-NEXT: strh wzr, [x0]
; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
-; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -2024,10 +2015,9 @@ define <16 x bfloat> @loaddup_str_v16bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v16bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h1, [x0]
-; CHECK-GI-NEXT: movi d2, #0000000000000000
+; CHECK-GI-NEXT: strh wzr, [x0]
; CHECK-GI-NEXT: dup v0.8h, v1.h[0]
; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
-; CHECK-GI-NEXT: str h2, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
>From 3058b44086af58530490aabb7496ee823850c92e Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Fri, 3 Oct 2025 15:47:59 +0000
Subject: [PATCH 4/4] Move matchFConstantToConstant to post legalizer lowering
---
llvm/lib/Target/AArch64/AArch64Combine.td | 3 +--
.../GISel/AArch64PostLegalizerLowering.cpp | 25 +++++++++++++++++++
.../GISel/AArch64PreLegalizerCombiner.cpp | 25 -------------------
.../AArch64/GlobalISel/combine-fconstant.mir | 21 +++++++++-------
4 files changed, 38 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 076a6235eef0a..639ddcba28468 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -69,7 +69,6 @@ def push_mul_through_sext : push_opcode_through_ext<G_MUL, G_SEXT>;
def AArch64PreLegalizerCombiner: GICombiner<
"AArch64PreLegalizerCombinerImpl", [all_combines,
- fconstant_to_constant,
icmp_redundant_trunc,
fold_global_offset,
shuffle_to_extract,
@@ -341,7 +340,7 @@ def AArch64PostLegalizerLowering
: GICombiner<"AArch64PostLegalizerLoweringImpl",
[shuffle_vector_lowering, vashr_vlshr_imm,
icmp_lowering, build_vector_lowering,
- lower_vector_fcmp, form_truncstore,
+ lower_vector_fcmp, form_truncstore, fconstant_to_constant,
vector_sext_inreg_to_shift,
unmerge_ext_to_unmerge, lower_mulv2s64,
vector_unmerge_lowering, insertelt_nonconst,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 6025f1c9f5f4e..e6dbc7a20088a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -75,6 +75,31 @@ struct ShuffleVectorPseudo {
ShuffleVectorPseudo() = default;
};
+/// Return true if a G_FCONSTANT instruction is known to be better-represented
+/// as a G_CONSTANT.
+bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
+ Register DstReg = MI.getOperand(0).getReg();
+ const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
+ if (DstSize != 16 && DstSize != 32 && DstSize != 64)
+ return false;
+
+ // When we're storing a value, it doesn't matter what register bank it's on.
+ // Since not all floating point constants can be materialized using a fmov,
+ // it makes more sense to just use a GPR.
+ return all_of(MRI.use_nodbg_instructions(DstReg),
+ [](const MachineInstr &Use) { return Use.mayStore(); });
+}
+
+/// Change a G_FCONSTANT into a G_CONSTANT.
+void applyFConstantToConstant(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
+ MachineIRBuilder MIB(MI);
+ const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
+ MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
+ MI.eraseFromParent();
+}
+
/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
/// sources of the shuffle are different.
std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 9a6c8ae5c89c6..896eab521bfdb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -44,31 +44,6 @@ namespace {
#include "AArch64GenPreLegalizeGICombiner.inc"
#undef GET_GICOMBINER_TYPES
-/// Return true if a G_FCONSTANT instruction is known to be better-represented
-/// as a G_CONSTANT.
-bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) {
- assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
- Register DstReg = MI.getOperand(0).getReg();
- const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
- if (DstSize != 16 && DstSize != 32 && DstSize != 64)
- return false;
-
- // When we're storing a value, it doesn't matter what register bank it's on.
- // Since not all floating point constants can be materialized using a fmov,
- // it makes more sense to just use a GPR.
- return all_of(MRI.use_nodbg_instructions(DstReg),
- [](const MachineInstr &Use) { return Use.mayStore(); });
-}
-
-/// Change a G_FCONSTANT into a G_CONSTANT.
-void applyFConstantToConstant(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
- MachineIRBuilder MIB(MI);
- const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
- MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
- MI.eraseFromParent();
-}
-
/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
/// are sign bits. In this case, we can transform the G_ICMP to directly compare
/// the wide value with a zero.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
index 6362ed65d09e3..9381f0f41bbbc 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
@@ -1,11 +1,12 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
-# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+# RUN: llc -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
...
---
name: fconstant_to_constant_s32
alignment: 4
tracksRegLiveness: true
+legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
@@ -24,16 +25,17 @@ body: |
; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
- %3:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
- %1:_(s64) = G_CONSTANT i64 524
- %2:_(p0) = G_PTR_ADD %0, %1(s64)
- G_STORE %3(s32), %2(p0) :: (store (s32))
+ %1:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
+ %2:_(s64) = G_CONSTANT i64 524
+ %3:_(p0) = G_PTR_ADD %0, %2(s64)
+ G_STORE %1(s32), %3(p0) :: (store (s32))
RET_ReallyLR
...
---
name: fconstant_to_constant_s64
alignment: 4
tracksRegLiveness: true
+legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
@@ -48,7 +50,7 @@ body: |
; CHECK-NEXT: G_STORE %c(s64), %ptr(p0) :: (store (s64))
; CHECK-NEXT: RET_ReallyLR
%ptr:_(p0) = COPY $x0
- %c:_(s64) = G_FCONSTANT double 0.0
+ %c:_(s64) = G_FCONSTANT double 0.000000e+00
G_STORE %c(s64), %ptr(p0) :: (store (s64))
RET_ReallyLR
...
@@ -56,6 +58,7 @@ body: |
name: no_store_means_no_combine
alignment: 4
tracksRegLiveness: true
+legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
@@ -71,7 +74,7 @@ body: |
; CHECK-NEXT: %add:_(s64) = G_FADD %v, %c
; CHECK-NEXT: RET_ReallyLR implicit %add(s64)
%v:_(s64) = COPY $x0
- %c:_(s64) = G_FCONSTANT double 0.0
+ %c:_(s64) = G_FCONSTANT double 0.000000e+00
%add:_(s64) = G_FADD %v, %c
- RET_ReallyLR implicit %add
+ RET_ReallyLR implicit %add(s64)
...
More information about the llvm-commits
mailing list