[llvm] [AArch64][GlobalISel] Add commute_constant_to_rhs to post legalizer combiners (PR #81103)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 8 00:36:56 PST 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/81103
This helps the fp reductions, moving the constant operands to the RHS which in turn helps simplify away fadd -0.0 and fmul 1.0.
>From aa8d56ce23a5b47a9c31daf1dbd240c1699ca489 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 8 Feb 2024 08:34:23 +0000
Subject: [PATCH] [AArch64][GlobalISel] Add commute_constant_to_rhs to post
legalizer combiners.
This helps the fp reductions, moving the constant operands to the RHS which in
turn helps simplify away fadd -0.0 and fmul 1.0.
---
llvm/lib/Target/AArch64/AArch64Combine.td | 3 +-
.../AArch64/GlobalISel/arm64-atomic.ll | 12 +-
.../CodeGen/AArch64/vecreduce-fadd-strict.ll | 214 ++++++------------
.../CodeGen/AArch64/vecreduce-fmul-strict.ll | 112 +++------
4 files changed, 105 insertions(+), 236 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 1daa7d5fe6a7a..fdea974d4540a 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -288,5 +288,6 @@ def AArch64PostLegalizerCombiner
constant_fold_binops, identity_combines,
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
- select_to_minmax, or_to_bsp]> {
+ select_to_minmax, or_to_bsp,
+ commute_constant_to_rhs]> {
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index 0e9c126e97a3d..458c2cb76d9e3 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -2146,8 +2146,7 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT: mov x2, x0
-; CHECK-OUTLINE-O1-NEXT: mov w8, #-1 ; =0xffffffff
-; CHECK-OUTLINE-O1-NEXT: eor w0, w8, w1
+; CHECK-OUTLINE-O1-NEXT: mvn w0, w1
; CHECK-OUTLINE-O1-NEXT: mov x1, x2
; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldclr1_rel
; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
@@ -3202,8 +3201,7 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT: mov x2, x0
-; CHECK-OUTLINE-O1-NEXT: mov w8, #-1 ; =0xffffffff
-; CHECK-OUTLINE-O1-NEXT: eor w0, w8, w1
+; CHECK-OUTLINE-O1-NEXT: mvn w0, w1
; CHECK-OUTLINE-O1-NEXT: mov x1, x2
; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldclr2_rel
; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
@@ -4255,8 +4253,7 @@ define i32 @atomicrmw_and_i32(ptr %ptr, i32 %rhs) {
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT: mov x2, x0
-; CHECK-OUTLINE-O1-NEXT: mov w8, #-1 ; =0xffffffff
-; CHECK-OUTLINE-O1-NEXT: eor w0, w8, w1
+; CHECK-OUTLINE-O1-NEXT: mvn w0, w1
; CHECK-OUTLINE-O1-NEXT: mov x1, x2
; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldclr4_rel
; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
@@ -5276,8 +5273,7 @@ define i64 @atomicrmw_and_i64(ptr %ptr, i64 %rhs) {
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT: mov x2, x0
-; CHECK-OUTLINE-O1-NEXT: mov x8, #-1 ; =0xffffffffffffffff
-; CHECK-OUTLINE-O1-NEXT: eor x0, x8, x1
+; CHECK-OUTLINE-O1-NEXT: mvn x0, x1
; CHECK-OUTLINE-O1-NEXT: mov x1, x2
; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldclr8_rel
; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
index 20237705cc6e5..de95943de3111 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
@@ -13,11 +13,7 @@ define float @add_HalfS(<2 x float> %bin.rdx) {
;
; CHECK-GI-LABEL: add_HalfS:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.2s, #128, lsl #24
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov s2, v0.s[1]
-; CHECK-GI-NEXT: fadd s0, s1, s0
-; CHECK-GI-NEXT: fadd s0, s0, s2
+; CHECK-GI-NEXT: faddp s0, v0.2s
; CHECK-GI-NEXT: ret
%r = call float @llvm.vector.reduce.fadd.f32.v2f32(float -0.0, <2 x float> %bin.rdx)
ret float %r
@@ -82,15 +78,12 @@ define half @add_HalfH(<4 x half> %bin.rdx) {
;
; CHECK-GI-FP16-LABEL: add_HalfH:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI1_0
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-GI-FP16-NEXT: mov h3, v0.h[2]
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0]
-; CHECK-GI-FP16-NEXT: fadd h1, h1, h0
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: fadd h1, h0, h1
; CHECK-GI-FP16-NEXT: mov h0, v0.h[3]
; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
-; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
; CHECK-GI-FP16-NEXT: ret
%r = call half @llvm.vector.reduce.fadd.f16.v4f16(half -0.0, <4 x half> %bin.rdx)
@@ -202,22 +195,18 @@ define half @add_H(<8 x half> %bin.rdx) {
;
; CHECK-GI-FP16-LABEL: add_H:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI2_0
-; CHECK-GI-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-GI-FP16-NEXT: mov h3, v0.h[2]
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
-; CHECK-GI-FP16-NEXT: fadd h1, h1, h0
-; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
-; CHECK-GI-FP16-NEXT: mov h2, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[2]
+; CHECK-GI-FP16-NEXT: faddp h2, v0.2h
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: fadd h1, h2, h1
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[4]
; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
-; CHECK-GI-FP16-NEXT: mov h3, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[5]
; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
-; CHECK-GI-FP16-NEXT: mov h2, v0.h[5]
-; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
-; CHECK-GI-FP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[6]
; CHECK-GI-FP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
+; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
; CHECK-GI-FP16-NEXT: ret
%r = call half @llvm.vector.reduce.fadd.f16.v8f16(half -0.0, <8 x half> %bin.rdx)
@@ -225,44 +214,23 @@ define half @add_H(<8 x half> %bin.rdx) {
}
define float @add_S(<4 x float> %bin.rdx) {
-; CHECK-SD-LABEL: add_S:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov s1, v0.s[2]
-; CHECK-SD-NEXT: faddp s2, v0.2s
-; CHECK-SD-NEXT: mov s0, v0.s[3]
-; CHECK-SD-NEXT: fadd s1, s2, s1
-; CHECK-SD-NEXT: fadd s0, s1, s0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: add_S:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.2s, #128, lsl #24
-; CHECK-GI-NEXT: mov s2, v0.s[1]
-; CHECK-GI-NEXT: mov s3, v0.s[2]
-; CHECK-GI-NEXT: fadd s1, s1, s0
-; CHECK-GI-NEXT: mov s0, v0.s[3]
-; CHECK-GI-NEXT: fadd s1, s1, s2
-; CHECK-GI-NEXT: fadd s1, s1, s3
-; CHECK-GI-NEXT: fadd s0, s1, s0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: add_S:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov s1, v0.s[2]
+; CHECK-NEXT: faddp s2, v0.2s
+; CHECK-NEXT: mov s0, v0.s[3]
+; CHECK-NEXT: fadd s1, s2, s1
+; CHECK-NEXT: fadd s0, s1, s0
+; CHECK-NEXT: ret
%r = call float @llvm.vector.reduce.fadd.f32.v4f32(float -0.0, <4 x float> %bin.rdx)
ret float %r
}
define double @add_D(<2 x double> %bin.rdx) {
-; CHECK-SD-LABEL: add_D:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: faddp d0, v0.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: add_D:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-GI-NEXT: mov d2, v0.d[1]
-; CHECK-GI-NEXT: fmov d1, x8
-; CHECK-GI-NEXT: fadd d0, d1, d0
-; CHECK-GI-NEXT: fadd d0, d0, d2
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: add_D:
+; CHECK: // %bb.0:
+; CHECK-NEXT: faddp d0, v0.2d
+; CHECK-NEXT: ret
%r = call double @llvm.vector.reduce.fadd.f64.v2f64(double -0.0, <2 x double> %bin.rdx)
ret double %r
}
@@ -464,23 +432,19 @@ define half @add_2H(<16 x half> %bin.rdx) {
;
; CHECK-GI-FP16-LABEL: add_2H:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI5_0
-; CHECK-GI-FP16-NEXT: mov h3, v0.h[1]
-; CHECK-GI-FP16-NEXT: mov h4, v0.h[2]
-; CHECK-GI-FP16-NEXT: ldr h2, [x8, :lo12:.LCPI5_0]
-; CHECK-GI-FP16-NEXT: fadd h2, h2, h0
-; CHECK-GI-FP16-NEXT: fadd h2, h2, h3
-; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: faddp h3, v0.2h
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[3]
+; CHECK-GI-FP16-NEXT: fadd h2, h3, h2
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[4]
; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
-; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[5]
; CHECK-GI-FP16-NEXT: fadd h2, h2, h3
-; CHECK-GI-FP16-NEXT: mov h3, v0.h[5]
-; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
-; CHECK-GI-FP16-NEXT: mov h4, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[6]
; CHECK-GI-FP16-NEXT: mov h0, v0.h[7]
+; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
; CHECK-GI-FP16-NEXT: fadd h2, h2, h3
; CHECK-GI-FP16-NEXT: mov h3, v1.h[2]
-; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
; CHECK-GI-FP16-NEXT: fadd h0, h2, h0
; CHECK-GI-FP16-NEXT: mov h2, v1.h[1]
; CHECK-GI-FP16-NEXT: fadd h0, h0, h1
@@ -502,95 +466,51 @@ define half @add_2H(<16 x half> %bin.rdx) {
}
define float @add_2S(<8 x float> %bin.rdx) {
-; CHECK-SD-LABEL: add_2S:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov s2, v0.s[2]
-; CHECK-SD-NEXT: faddp s3, v0.2s
-; CHECK-SD-NEXT: mov s0, v0.s[3]
-; CHECK-SD-NEXT: fadd s2, s3, s2
-; CHECK-SD-NEXT: mov s3, v1.s[2]
-; CHECK-SD-NEXT: fadd s0, s2, s0
-; CHECK-SD-NEXT: mov s2, v1.s[1]
-; CHECK-SD-NEXT: fadd s0, s0, s1
-; CHECK-SD-NEXT: mov s1, v1.s[3]
-; CHECK-SD-NEXT: fadd s0, s0, s2
-; CHECK-SD-NEXT: fadd s0, s0, s3
-; CHECK-SD-NEXT: fadd s0, s0, s1
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: add_2S:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.2s, #128, lsl #24
-; CHECK-GI-NEXT: mov s3, v0.s[1]
-; CHECK-GI-NEXT: mov s4, v0.s[2]
-; CHECK-GI-NEXT: fadd s2, s2, s0
-; CHECK-GI-NEXT: mov s0, v0.s[3]
-; CHECK-GI-NEXT: fadd s2, s2, s3
-; CHECK-GI-NEXT: mov s3, v1.s[2]
-; CHECK-GI-NEXT: fadd s2, s2, s4
-; CHECK-GI-NEXT: fadd s0, s2, s0
-; CHECK-GI-NEXT: mov s2, v1.s[1]
-; CHECK-GI-NEXT: fadd s0, s0, s1
-; CHECK-GI-NEXT: mov s1, v1.s[3]
-; CHECK-GI-NEXT: fadd s0, s0, s2
-; CHECK-GI-NEXT: fadd s0, s0, s3
-; CHECK-GI-NEXT: fadd s0, s0, s1
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: add_2S:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov s2, v0.s[2]
+; CHECK-NEXT: faddp s3, v0.2s
+; CHECK-NEXT: mov s0, v0.s[3]
+; CHECK-NEXT: fadd s2, s3, s2
+; CHECK-NEXT: mov s3, v1.s[2]
+; CHECK-NEXT: fadd s0, s2, s0
+; CHECK-NEXT: mov s2, v1.s[1]
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: mov s1, v1.s[3]
+; CHECK-NEXT: fadd s0, s0, s2
+; CHECK-NEXT: fadd s0, s0, s3
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: ret
%r = call float @llvm.vector.reduce.fadd.f32.v8f32(float -0.0, <8 x float> %bin.rdx)
ret float %r
}
define double @add_2D(<4 x double> %bin.rdx) {
-; CHECK-SD-LABEL: add_2D:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: faddp d0, v0.2d
-; CHECK-SD-NEXT: mov d2, v1.d[1]
-; CHECK-SD-NEXT: fadd d0, d0, d1
-; CHECK-SD-NEXT: fadd d0, d0, d2
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: add_2D:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
-; CHECK-GI-NEXT: mov d3, v0.d[1]
-; CHECK-GI-NEXT: fmov d2, x8
-; CHECK-GI-NEXT: fadd d0, d2, d0
-; CHECK-GI-NEXT: mov d2, v1.d[1]
-; CHECK-GI-NEXT: fadd d0, d0, d3
-; CHECK-GI-NEXT: fadd d0, d0, d1
-; CHECK-GI-NEXT: fadd d0, d0, d2
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: add_2D:
+; CHECK: // %bb.0:
+; CHECK-NEXT: faddp d0, v0.2d
+; CHECK-NEXT: mov d2, v1.d[1]
+; CHECK-NEXT: fadd d0, d0, d1
+; CHECK-NEXT: fadd d0, d0, d2
+; CHECK-NEXT: ret
%r = call double @llvm.vector.reduce.fadd.f64.v4f64(double -0.0, <4 x double> %bin.rdx)
ret double %r
}
; Added at least one test where the start value is not -0.0.
define float @add_S_init_42(<4 x float> %bin.rdx) {
-; CHECK-SD-LABEL: add_S_init_42:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov w8, #1109917696 // =0x42280000
-; CHECK-SD-NEXT: mov s2, v0.s[1]
-; CHECK-SD-NEXT: mov s3, v0.s[2]
-; CHECK-SD-NEXT: fmov s1, w8
-; CHECK-SD-NEXT: fadd s1, s0, s1
-; CHECK-SD-NEXT: mov s0, v0.s[3]
-; CHECK-SD-NEXT: fadd s1, s1, s2
-; CHECK-SD-NEXT: fadd s1, s1, s3
-; CHECK-SD-NEXT: fadd s0, s1, s0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: add_S_init_42:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov w8, #1109917696 // =0x42280000
-; CHECK-GI-NEXT: mov s2, v0.s[1]
-; CHECK-GI-NEXT: mov s3, v0.s[2]
-; CHECK-GI-NEXT: fmov s1, w8
-; CHECK-GI-NEXT: fadd s1, s1, s0
-; CHECK-GI-NEXT: mov s0, v0.s[3]
-; CHECK-GI-NEXT: fadd s1, s1, s2
-; CHECK-GI-NEXT: fadd s1, s1, s3
-; CHECK-GI-NEXT: fadd s0, s1, s0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: add_S_init_42:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1109917696 // =0x42280000
+; CHECK-NEXT: mov s2, v0.s[1]
+; CHECK-NEXT: mov s3, v0.s[2]
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: fadd s1, s0, s1
+; CHECK-NEXT: mov s0, v0.s[3]
+; CHECK-NEXT: fadd s1, s1, s2
+; CHECK-NEXT: fadd s1, s1, s3
+; CHECK-NEXT: fadd s0, s1, s0
+; CHECK-NEXT: ret
%r = call float @llvm.vector.reduce.fadd.f32.v4f32(float 42.0, <4 x float> %bin.rdx)
ret float %r
}
@@ -604,5 +524,3 @@ declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
index 32ce4d6eb9675..7b93e60e49b99 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
@@ -13,11 +13,9 @@ define float @mul_HalfS(<2 x float> %bin.rdx) {
;
; CHECK-GI-LABEL: mul_HalfS:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fmov s1, #1.00000000
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov s2, v0.s[1]
-; CHECK-GI-NEXT: fmul s0, s1, s0
-; CHECK-GI-NEXT: fmul s0, s0, s2
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: fmul s0, s0, s1
; CHECK-GI-NEXT: ret
%r = call float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %bin.rdx)
ret float %r
@@ -80,14 +78,12 @@ define half @mul_HalfH(<4 x half> %bin.rdx) {
;
; CHECK-GI-FP16-LABEL: mul_HalfH:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: fmov h1, #1.00000000
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: mov h2, v0.h[1]
-; CHECK-GI-FP16-NEXT: mov h3, v0.h[2]
-; CHECK-GI-FP16-NEXT: fmul h1, h1, h0
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: fmul h1, h0, h1
; CHECK-GI-FP16-NEXT: mov h0, v0.h[3]
; CHECK-GI-FP16-NEXT: fmul h1, h1, h2
-; CHECK-GI-FP16-NEXT: fmul h1, h1, h3
; CHECK-GI-FP16-NEXT: fmul h0, h1, h0
; CHECK-GI-FP16-NEXT: ret
%r = call half @llvm.vector.reduce.fmul.f16.v4f16(half 1.0, <4 x half> %bin.rdx)
@@ -193,9 +189,7 @@ define half @mul_H(<8 x half> %bin.rdx) {
;
; CHECK-GI-FP16-LABEL: mul_H:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: fmov h1, #1.00000000
-; CHECK-GI-FP16-NEXT: fmul h1, h1, h0
-; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: fmul h1, h0, v0.h[1]
; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[2]
; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[3]
; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[4]
@@ -208,37 +202,21 @@ define half @mul_H(<8 x half> %bin.rdx) {
}
define float @mul_S(<4 x float> %bin.rdx) {
-; CHECK-SD-LABEL: mul_S:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmul s1, s0, v0.s[1]
-; CHECK-SD-NEXT: fmul s1, s1, v0.s[2]
-; CHECK-SD-NEXT: fmul s0, s1, v0.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: mul_S:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fmov s1, #1.00000000
-; CHECK-GI-NEXT: fmul s1, s1, s0
-; CHECK-GI-NEXT: fmul s1, s1, v0.s[1]
-; CHECK-GI-NEXT: fmul s1, s1, v0.s[2]
-; CHECK-GI-NEXT: fmul s0, s1, v0.s[3]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: mul_S:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul s1, s0, v0.s[1]
+; CHECK-NEXT: fmul s1, s1, v0.s[2]
+; CHECK-NEXT: fmul s0, s1, v0.s[3]
+; CHECK-NEXT: ret
%r = call float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %bin.rdx)
ret float %r
}
define double @mul_D(<2 x double> %bin.rdx) {
-; CHECK-SD-LABEL: mul_D:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmul d0, d0, v0.d[1]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: mul_D:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fmov d1, #1.00000000
-; CHECK-GI-NEXT: fmul d1, d1, d0
-; CHECK-GI-NEXT: fmul d0, d1, v0.d[1]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: mul_D:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul d0, d0, v0.d[1]
+; CHECK-NEXT: ret
%r = call double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %bin.rdx)
ret double %r
}
@@ -427,9 +405,7 @@ define half @mul_2H(<16 x half> %bin.rdx) {
;
; CHECK-GI-FP16-LABEL: mul_2H:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: fmov h2, #1.00000000
-; CHECK-GI-FP16-NEXT: fmul h2, h2, h0
-; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[1]
+; CHECK-GI-FP16-NEXT: fmul h2, h0, v0.h[1]
; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[2]
; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[3]
; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[4]
@@ -450,49 +426,27 @@ define half @mul_2H(<16 x half> %bin.rdx) {
}
define float @mul_2S(<8 x float> %bin.rdx) {
-; CHECK-SD-LABEL: mul_2S:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmul s2, s0, v0.s[1]
-; CHECK-SD-NEXT: fmul s2, s2, v0.s[2]
-; CHECK-SD-NEXT: fmul s0, s2, v0.s[3]
-; CHECK-SD-NEXT: fmul s0, s0, s1
-; CHECK-SD-NEXT: fmul s0, s0, v1.s[1]
-; CHECK-SD-NEXT: fmul s0, s0, v1.s[2]
-; CHECK-SD-NEXT: fmul s0, s0, v1.s[3]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: mul_2S:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fmov s2, #1.00000000
-; CHECK-GI-NEXT: fmul s2, s2, s0
-; CHECK-GI-NEXT: fmul s2, s2, v0.s[1]
-; CHECK-GI-NEXT: fmul s2, s2, v0.s[2]
-; CHECK-GI-NEXT: fmul s0, s2, v0.s[3]
-; CHECK-GI-NEXT: fmul s0, s0, s1
-; CHECK-GI-NEXT: fmul s0, s0, v1.s[1]
-; CHECK-GI-NEXT: fmul s0, s0, v1.s[2]
-; CHECK-GI-NEXT: fmul s0, s0, v1.s[3]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: mul_2S:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul s2, s0, v0.s[1]
+; CHECK-NEXT: fmul s2, s2, v0.s[2]
+; CHECK-NEXT: fmul s0, s2, v0.s[3]
+; CHECK-NEXT: fmul s0, s0, s1
+; CHECK-NEXT: fmul s0, s0, v1.s[1]
+; CHECK-NEXT: fmul s0, s0, v1.s[2]
+; CHECK-NEXT: fmul s0, s0, v1.s[3]
+; CHECK-NEXT: ret
%r = call float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %bin.rdx)
ret float %r
}
define double @mul_2D(<4 x double> %bin.rdx) {
-; CHECK-SD-LABEL: mul_2D:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmul d0, d0, v0.d[1]
-; CHECK-SD-NEXT: fmul d0, d0, d1
-; CHECK-SD-NEXT: fmul d0, d0, v1.d[1]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: mul_2D:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fmov d2, #1.00000000
-; CHECK-GI-NEXT: fmul d2, d2, d0
-; CHECK-GI-NEXT: fmul d0, d2, v0.d[1]
-; CHECK-GI-NEXT: fmul d0, d0, d1
-; CHECK-GI-NEXT: fmul d0, d0, v1.d[1]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: mul_2D:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul d0, d0, v0.d[1]
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: fmul d0, d0, v1.d[1]
+; CHECK-NEXT: ret
%r = call double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %bin.rdx)
ret double %r
}
More information about the llvm-commits
mailing list