[llvm] 03a4702 - [RISCV] Fix the neutral element in vector 'fadd' reductions
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 14 02:27:47 PDT 2021
Author: Fraser Cormack
Date: 2021-07-14T10:18:38+01:00
New Revision: 03a4702c884a0498db902aff34ebb19c48b4696b
URL: https://github.com/llvm/llvm-project/commit/03a4702c884a0498db902aff34ebb19c48b4696b
DIFF: https://github.com/llvm/llvm-project/commit/03a4702c884a0498db902aff34ebb19c48b4696b.diff
LOG: [RISCV] Fix the neutral element in vector 'fadd' reductions
Using positive zero as the neutral element in 'fadd' reductions, while
it generates better code, is incorrect. The correct neutral element is
negative zero: 0.0 + -0.0 = 0.0, whereas -0.0 + -0.0 = -0.0.
There are perhaps more optimal lowerings of negative zero avoiding
constant-pool loads which could be left as future work.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D105902
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9d2ae6788a929..e6527a9967dbd 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3718,7 +3718,7 @@ getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
llvm_unreachable("Unhandled reduction");
case ISD::VECREDUCE_FADD:
return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0),
- DAG.getConstantFP(0.0, DL, EltVT));
+ DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
case ISD::VECREDUCE_SEQ_FADD:
return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
Op.getOperand(0));
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
index 76dabfc23bb15..b4219cd763109 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>)
@@ -38,10 +38,12 @@ declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>)
define half @vreduce_fadd_v2f16(<2 x half>* %x, half %s) {
; CHECK-LABEL: vreduce_fadd_v2f16:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; CHECK-NEXT: vle16.v v25, (a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v25, v26
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -73,10 +75,12 @@ declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
define half @vreduce_fadd_v4f16(<4 x half>* %x, half %s) {
; CHECK-LABEL: vreduce_fadd_v4f16:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v25, (a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v25, v26
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -108,10 +112,12 @@ declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
define half @vreduce_fadd_v8f16(<8 x half>* %x, half %s) {
; CHECK-LABEL: vreduce_fadd_v8f16:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; CHECK-NEXT: vle16.v v25, (a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v25, v26
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -143,10 +149,12 @@ declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
define half @vreduce_fadd_v16f16(<16 x half>* %x, half %s) {
; CHECK-LABEL: vreduce_fadd_v16f16:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI8_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; CHECK-NEXT: vle16.v v26, (a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v26, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -176,18 +184,35 @@ define half @vreduce_ord_fadd_v16f16(<16 x half>* %x, half %s) {
declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>)
define half @vreduce_fadd_v32f16(<32 x half>* %x, half %s) {
-; CHECK-LABEL: vreduce_fadd_v32f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT: vle16.v v28, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT: vfredsum.vs v25, v28, v25
-; CHECK-NEXT: vfmv.f.s ft0, v25
-; CHECK-NEXT: fadd.h fa0, fa0, ft0
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fadd_v32f16:
+; RV32: # %bb.0:
+; RV32-NEXT: addi a1, zero, 32
+; RV32-NEXT: lui a2, %hi(.LCPI10_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI10_0)(a2)
+; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu
+; RV32-NEXT: vle16.v v28, (a0)
+; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu
+; RV32-NEXT: vfmv.v.f v25, ft0
+; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu
+; RV32-NEXT: vfredsum.vs v25, v28, v25
+; RV32-NEXT: vfmv.f.s ft0, v25
+; RV32-NEXT: fadd.h fa0, fa0, ft0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fadd_v32f16:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI10_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI10_0)(a1)
+; RV64-NEXT: addi a1, zero, 32
+; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu
+; RV64-NEXT: vle16.v v28, (a0)
+; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu
+; RV64-NEXT: vfmv.v.f v25, ft0
+; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu
+; RV64-NEXT: vfredsum.vs v25, v28, v25
+; RV64-NEXT: vfmv.f.s ft0, v25
+; RV64-NEXT: fadd.h fa0, fa0, ft0
+; RV64-NEXT: ret
%v = load <32 x half>, <32 x half>* %x
%red = call reassoc half @llvm.vector.reduce.fadd.v32f16(half %s, <32 x half> %v)
ret half %red
@@ -213,18 +238,35 @@ define half @vreduce_ord_fadd_v32f16(<32 x half>* %x, half %s) {
declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>)
define half @vreduce_fadd_v64f16(<64 x half>* %x, half %s) {
-; CHECK-LABEL: vreduce_fadd_v64f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, zero, 64
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT: vfredsum.vs v25, v8, v25
-; CHECK-NEXT: vfmv.f.s ft0, v25
-; CHECK-NEXT: fadd.h fa0, fa0, ft0
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fadd_v64f16:
+; RV32: # %bb.0:
+; RV32-NEXT: addi a1, zero, 64
+; RV32-NEXT: lui a2, %hi(.LCPI12_0)
+; RV32-NEXT: flh ft0, %lo(.LCPI12_0)(a2)
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu
+; RV32-NEXT: vfmv.v.f v25, ft0
+; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu
+; RV32-NEXT: vfredsum.vs v25, v8, v25
+; RV32-NEXT: vfmv.f.s ft0, v25
+; RV32-NEXT: fadd.h fa0, fa0, ft0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fadd_v64f16:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI12_0)
+; RV64-NEXT: flh ft0, %lo(.LCPI12_0)(a1)
+; RV64-NEXT: addi a1, zero, 64
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, mu
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu
+; RV64-NEXT: vfmv.v.f v25, ft0
+; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, mu
+; RV64-NEXT: vfredsum.vs v25, v8, v25
+; RV64-NEXT: vfmv.f.s ft0, v25
+; RV64-NEXT: fadd.h fa0, fa0, ft0
+; RV64-NEXT: ret
%v = load <64 x half>, <64 x half>* %x
%red = call reassoc half @llvm.vector.reduce.fadd.v64f16(half %s, <64 x half> %v)
ret half %red
@@ -257,9 +299,11 @@ define half @vreduce_fadd_v128f16(<128 x half>* %x, half %s) {
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI14_0)(a0)
; CHECK-NEXT: vfadd.vv v8, v8, v16
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -330,10 +374,12 @@ declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
define float @vreduce_fadd_v2f32(<2 x float>* %x, float %s) {
; CHECK-LABEL: vreduce_fadd_v2f32:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
+; CHECK-NEXT: flw ft0, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v25, v26
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -365,10 +411,12 @@ declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
define float @vreduce_fadd_v4f32(<4 x float>* %x, float %s) {
; CHECK-LABEL: vreduce_fadd_v4f32:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
+; CHECK-NEXT: flw ft0, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v25, v26
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -400,10 +448,12 @@ declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
define float @vreduce_fadd_v8f32(<8 x float>* %x, float %s) {
; CHECK-LABEL: vreduce_fadd_v8f32:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
+; CHECK-NEXT: flw ft0, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; CHECK-NEXT: vle32.v v26, (a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v26, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -435,10 +485,12 @@ declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
define float @vreduce_fadd_v16f32(<16 x float>* %x, float %s) {
; CHECK-LABEL: vreduce_fadd_v16f32:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
+; CHECK-NEXT: flw ft0, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; CHECK-NEXT: vle32.v v28, (a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v28, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -468,18 +520,35 @@ define float @vreduce_ord_fadd_v16f32(<16 x float>* %x, float %s) {
declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>)
define float @vreduce_fadd_v32f32(<32 x float>* %x, float %s) {
-; CHECK-LABEL: vreduce_fadd_v32f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT: vfredsum.vs v25, v8, v25
-; CHECK-NEXT: vfmv.f.s ft0, v25
-; CHECK-NEXT: fadd.s fa0, fa0, ft0
-; CHECK-NEXT: ret
+; RV32-LABEL: vreduce_fadd_v32f32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi a1, zero, 32
+; RV32-NEXT: lui a2, %hi(.LCPI26_0)
+; RV32-NEXT: flw ft0, %lo(.LCPI26_0)(a2)
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu
+; RV32-NEXT: vfmv.v.f v25, ft0
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu
+; RV32-NEXT: vfredsum.vs v25, v8, v25
+; RV32-NEXT: vfmv.f.s ft0, v25
+; RV32-NEXT: fadd.s fa0, fa0, ft0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vreduce_fadd_v32f32:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, %hi(.LCPI26_0)
+; RV64-NEXT: flw ft0, %lo(.LCPI26_0)(a1)
+; RV64-NEXT: addi a1, zero, 32
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu
+; RV64-NEXT: vfmv.v.f v25, ft0
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu
+; RV64-NEXT: vfredsum.vs v25, v8, v25
+; RV64-NEXT: vfmv.f.s ft0, v25
+; RV64-NEXT: fadd.s fa0, fa0, ft0
+; RV64-NEXT: ret
%v = load <32 x float>, <32 x float>* %x
%red = call reassoc float @llvm.vector.reduce.fadd.v32f32(float %s, <32 x float> %v)
ret float %red
@@ -512,9 +581,11 @@ define float @vreduce_fadd_v64f32(<64 x float>* %x, float %s) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI28_0)
+; CHECK-NEXT: flw ft0, %lo(.LCPI28_0)(a0)
; CHECK-NEXT: vfadd.vv v8, v8, v16
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -585,10 +656,12 @@ declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
define double @vreduce_fadd_v2f64(<2 x double>* %x, double %s) {
; CHECK-LABEL: vreduce_fadd_v2f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI32_0)
+; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; CHECK-NEXT: vle64.v v25, (a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vfmv.v.f v26, ft0
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v25, v26
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -620,10 +693,12 @@ declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
define double @vreduce_fadd_v4f64(<4 x double>* %x, double %s) {
; CHECK-LABEL: vreduce_fadd_v4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI34_0)
+; CHECK-NEXT: fld ft0, %lo(.LCPI34_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; CHECK-NEXT: vle64.v v26, (a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v26, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -655,10 +730,12 @@ declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>)
define double @vreduce_fadd_v8f64(<8 x double>* %x, double %s) {
; CHECK-LABEL: vreduce_fadd_v8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT: fld ft0, %lo(.LCPI36_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; CHECK-NEXT: vle64.v v28, (a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v28, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -690,10 +767,12 @@ declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>)
define double @vreduce_fadd_v16f64(<16 x double>* %x, double %s) {
; CHECK-LABEL: vreduce_fadd_v16f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT: fld ft0, %lo(.LCPI38_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -729,9 +808,11 @@ define double @vreduce_fadd_v32f64(<32 x double>* %x, double %s) {
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle64.v v16, (a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI40_0)
+; CHECK-NEXT: fld ft0, %lo(.LCPI40_0)(a0)
; CHECK-NEXT: vfadd.vv v8, v8, v16
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
index f9f7cddd5469a..9c0199ba621c3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
@@ -9,8 +9,10 @@ declare half @llvm.vector.reduce.fadd.nxv1f16(half, <vscale x 1 x half>)
define half @vreduce_fadd_nxv1f16(<vscale x 1 x half> %v, half %s) {
; CHECK-LABEL: vreduce_fadd_nxv1f16:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -38,8 +40,10 @@ declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>)
define half @vreduce_fadd_nxv2f16(<vscale x 2 x half> %v, half %s) {
; CHECK-LABEL: vreduce_fadd_nxv2f16:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -67,8 +71,10 @@ declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>)
define half @vreduce_fadd_nxv4f16(<vscale x 4 x half> %v, half %s) {
; CHECK-LABEL: vreduce_fadd_nxv4f16:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.h fa0, fa0, ft0
@@ -94,8 +100,10 @@ declare float @llvm.vector.reduce.fadd.nxv1f32(float, <vscale x 1 x float>)
define float @vreduce_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
; CHECK-LABEL: vreduce_fadd_nxv1f32:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI6_0)
+; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -123,8 +131,10 @@ declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>)
define float @vreduce_fadd_nxv2f32(<vscale x 2 x float> %v, float %s) {
; CHECK-LABEL: vreduce_fadd_nxv2f32:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
+; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.s fa0, fa0, ft0
@@ -150,8 +160,10 @@ declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-LABEL: vreduce_fadd_nxv4f32:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
+; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -179,8 +191,10 @@ declare double @llvm.vector.reduce.fadd.nxv1f64(double, <vscale x 1 x double>)
define double @vreduce_fadd_nxv1f64(<vscale x 1 x double> %v, double %s) {
; CHECK-LABEL: vreduce_fadd_nxv1f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
+; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
; CHECK-NEXT: fadd.d fa0, fa0, ft0
@@ -206,8 +220,10 @@ declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; CHECK-LABEL: vreduce_fadd_nxv2f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
+; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
@@ -235,8 +251,10 @@ declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; CHECK-LABEL: vreduce_fadd_nxv4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI16_0)
+; CHECK-NEXT: fld ft0, %lo(.LCPI16_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vfmv.v.f v25, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu
; CHECK-NEXT: vfredsum.vs v25, v8, v25
; CHECK-NEXT: vfmv.f.s ft0, v25
More information about the llvm-commits
mailing list