[llvm] [AArch64][GlobalISel] Add G_FPEXT(G_FCONSTANT) folding (PR #160902)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 26 08:19:29 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Ryan Cowan (HolyMolyCowMan)
<details>
<summary>Changes</summary>
This change adds a new folding pattern, folding a G_FPEXT(G_FCONSTANT) to a G_FCONSTANT.
To make this work on AArch64, the `G_FCONSTANT` should not be widened due to the `G_FCONSTANT` being converted to a `G_CONSTANT`. This should fix some other floating point combines when the `G_FCONSTANT` is widened due to being an fp16.
---
Patch is 302.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160902.diff
23 Files Affected:
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+2)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+1)
- (modified) llvm/lib/Target/AArch64/AArch64Combine.td (+1-1)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+2-2)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir (+3-2)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir (+3-3)
- (modified) llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll (+2-5)
- (modified) llvm/test/CodeGen/AArch64/dup.ll (+20-10)
- (modified) llvm/test/CodeGen/AArch64/f16-instructions.ll (+10-11)
- (modified) llvm/test/CodeGen/AArch64/fcvt-fixed.ll (+176-385)
- (modified) llvm/test/CodeGen/AArch64/frem-power2.ll (+1-2)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll (+11-41)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-fadd.ll (+18-24)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll (+6-24)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll (+1501-1716)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll (+7-10)
- (modified) llvm/test/CodeGen/AMDGPU/fmed3.ll (+18-28)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.exp.ll (+6-9)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.exp10.ll (+6-9)
- (modified) llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll (+5-9)
- (modified) llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll (+32-44)
- (modified) llvm/test/CodeGen/AMDGPU/maximumnum.ll (+1-2)
- (modified) llvm/test/CodeGen/AMDGPU/minimumnum.ll (+1-2)
``````````diff
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 204e1f6887fa2..57828a270ec00 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -694,6 +694,7 @@ def constant_fold_fabs : constant_fold_unary_fp_op_rule<G_FABS>;
def constant_fold_fsqrt : constant_fold_unary_fp_op_rule<G_FSQRT>;
def constant_fold_flog2 : constant_fold_unary_fp_op_rule<G_FLOG2>;
def constant_fold_fptrunc : constant_fold_unary_fp_op_rule<G_FPTRUNC>;
+def constant_fold_fpext : constant_fold_unary_fp_op_rule<G_FPEXT>;
// Fold constant zero int to fp conversions.
class itof_const_zero_fold_rule<Instruction opcode> : GICombineRule <
@@ -712,6 +713,7 @@ def constant_fold_fp_ops : GICombineGroup<[
constant_fold_fsqrt,
constant_fold_flog2,
constant_fold_fptrunc,
+ constant_fold_fpext,
itof_const_zero_fold_si,
itof_const_zero_fold_ui
]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 0ebee2cfd8688..2206a558f9f4c 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1728,6 +1728,7 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI,
Result.clearSign();
return Result;
}
+ case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC: {
bool Unused;
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 076a6235eef0a..121ed198a5958 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -351,7 +351,7 @@ def AArch64PostLegalizerLowering
// Post-legalization combines which are primarily optimizations.
def AArch64PostLegalizerCombiner
: GICombiner<"AArch64PostLegalizerCombinerImpl",
- [copy_prop, cast_of_cast_combines,
+ [copy_prop, cast_of_cast_combines, constant_fold_fp_ops,
buildvector_of_truncate, integer_of_truncate,
mutate_anyext_to_zext, combines_for_extload,
combine_indexed_load_store, sext_trunc_sextload,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index ea2196a584127..5613364626692 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -678,8 +678,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0)
.clampScalar(0, s8, s64);
getActionDefinitionsBuilder(G_FCONSTANT)
- .legalFor({s32, s64, s128})
- .legalFor(HasFP16, {s16})
+ // Always legalize S16 to prevent G_FCONSTANT being widened to G_CONSTANT
+ .legalFor({s16, s32, s64, s128})
.clampScalar(0, MinFPScalar, s128);
// FIXME: fix moreElementsToNextPow2
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
index c301e76852b54..c00ce2242a888 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
@@ -48,8 +48,9 @@ body: |
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
; CHECK-NEXT: $x0 = COPY [[C1]](s64)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: $w0 = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
%0:_(s32) = G_FCONSTANT float 1.0
$w0 = COPY %0
%1:_(s64) = G_FCONSTANT double 2.0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
index ddf219dc4927e..c6df3456a8445 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
@@ -8,7 +8,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
@@ -26,7 +26,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16_non_zero
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
@@ -44,7 +44,7 @@ tracksRegLiveness: true
body: |
bb.1.entry:
; NO-FP16-LABEL: name: nan
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01
; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16)
; NO-FP16-NEXT: $w0 = COPY %ext(s32)
; NO-FP16-NEXT: RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index cb5df07c7ede4..e8e563135acc5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -739,15 +739,12 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
;
; GISEL-LABEL: postidx32_shalf:
; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #0 ; =0x0
; GISEL-NEXT: ldr h1, [x0], #4
-; GISEL-NEXT: fmov s2, w8
; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0
; GISEL-NEXT: fmov w9, s0
-; GISEL-NEXT: fcvt s3, h1
+; GISEL-NEXT: fcvt s2, h1
; GISEL-NEXT: fmov w8, s1
-; GISEL-NEXT: fcvt s2, h2
-; GISEL-NEXT: fcmp s3, s2
+; GISEL-NEXT: fcmp s2, #0.0
; GISEL-NEXT: csel w8, w8, w9, mi
; GISEL-NEXT: strh w8, [x1]
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index 079ff1076b110..1c4a6ab2217b0 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -1469,8 +1469,9 @@ define <2 x half> @loaddup_str_v2half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v2half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1526,8 +1527,9 @@ define <3 x half> @loaddup_str_v3half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v3half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1583,8 +1585,9 @@ define <4 x half> @loaddup_str_v4half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v4half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1639,8 +1642,9 @@ define <8 x half> @loaddup_str_v8half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v8half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1713,9 +1717,10 @@ define <16 x half> @loaddup_str_v16half(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v16half:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h1, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d2, #0000000000000000
; CHECK-GI-NEXT: dup v0.8h, v1.h[0]
; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: str h2, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load half, ptr %p
@@ -1771,8 +1776,9 @@ define <2 x bfloat> @loaddup_str_v2bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v2bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -1828,8 +1834,9 @@ define <3 x bfloat> @loaddup_str_v3bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v3bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -1885,8 +1892,9 @@ define <4 x bfloat> @loaddup_str_v4bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v4bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -1941,8 +1949,9 @@ define <8 x bfloat> @loaddup_str_v8bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v8bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h0, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT: str h1, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
@@ -2015,9 +2024,10 @@ define <16 x bfloat> @loaddup_str_v16bfloat(ptr %p) {
; CHECK-GI-LABEL: loaddup_str_v16bfloat:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr h1, [x0]
-; CHECK-GI-NEXT: strh wzr, [x0]
+; CHECK-GI-NEXT: movi d2, #0000000000000000
; CHECK-GI-NEXT: dup v0.8h, v1.h[0]
; CHECK-GI-NEXT: dup v1.8h, v1.h[0]
+; CHECK-GI-NEXT: str h2, [x0]
; CHECK-GI-NEXT: ret
entry:
%a = load bfloat, ptr %p
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index adc536da26f26..085170c7ba381 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -782,18 +782,17 @@ define void @test_fccmp(half %in, ptr %out) {
;
; CHECK-CVT-GI-LABEL: test_fccmp:
; CHECK-CVT-GI: // %bb.0:
-; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500
-; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-GI-NEXT: fcvt s2, h0
-; CHECK-CVT-GI-NEXT: fmov s1, w8
-; CHECK-CVT-GI-NEXT: fmov s3, w9
-; CHECK-CVT-GI-NEXT: fmov w9, s0
-; CHECK-CVT-GI-NEXT: fcvt s1, h1
-; CHECK-CVT-GI-NEXT: fcvt s3, h3
-; CHECK-CVT-GI-NEXT: fcmp s2, s1
-; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi
-; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt
+; CHECK-CVT-GI-NEXT: fcvt s1, h0
+; CHECK-CVT-GI-NEXT: fmov s2, #5.00000000
+; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
+; CHECK-CVT-GI-NEXT: fmov s3, #8.00000000
+; CHECK-CVT-GI-NEXT: fcmp s1, s2
+; CHECK-CVT-GI-NEXT: ldr h2, [x8, :lo12:.LCPI29_0]
+; CHECK-CVT-GI-NEXT: fmov w8, s0
+; CHECK-CVT-GI-NEXT: fmov w9, s2
+; CHECK-CVT-GI-NEXT: fccmp s1, s3, #4, mi
+; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
; CHECK-CVT-GI-NEXT: strh w8, [x0]
; CHECK-CVT-GI-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 51aad4fe25d3b..743d1604388de 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -149,33 +149,21 @@ define i64 @fcvtzs_f64_i64_64(double %dbl) {
}
define i32 @fcvtzs_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI8_0
@@ -189,33 +177,21 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
}
define i32 @fcvtzs_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i32_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs w0, h0, #15
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI9_0
@@ -229,33 +205,21 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
}
define i64 @fcvtzs_f16_i64_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs x0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI10_0
@@ -269,33 +233,21 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
}
define i64 @fcvtzs_f16_i64_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzs x0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzs_f16_i64_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzs x0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzs x0, h0, #15
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzs x0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI11_0
@@ -453,33 +405,21 @@ define i64 @fcvtzu_f64_i64_64(double %dbl) {
}
define i32 @fcvtzu_f16_i32_7(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #67, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_7:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzu w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_7:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #7
; CHECK-SD-FP16-NEXT: ret
;
-; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt s1, h1
-; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
-; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fcvtzu w0, s0
-; CHECK-GI-NO16-NEXT: ret
-;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI20_0
@@ -493,33 +433,21 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
}
define i32 @fcvtzu_f16_i32_15(half %flt) {
-; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_15:
-; CHECK-SD-NO16: // %bb.0:
-; CHECK-SD-NO16-NEXT: movi v1.2s, #71, lsl #24
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fmul s0, s0, s1
-; CHECK-SD-NO16-NEXT: fcvt h0, s0
-; CHECK-SD-NO16-NEXT: fcvt s0, h0
-; CHECK-SD-NO16-NEXT: fcvtzu w0, s0
-; CHECK-SD-NO16-NEXT: ret
+; CHECK-NO16-LABEL: fcvtzu_f16_i32_15:
+; CHECK-NO16: // %bb.0:
+; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fmul s0, s0, s1
+; CHECK-NO16-NEXT: fcvt h0, s0
+; CHECK-NO16-NEXT: fcvt s0, h0
+; CHECK-NO16-NEXT: fcvtzu w0, s0
+; CHECK-NO16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_15:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: fcvtzu w0, h0, #15
; CHECK-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/160902
More information about the llvm-commits
mailing list