[llvm] ab25e26 - [SelectionDAG] Enable WidenVecOp_VECREDUCE_SEQ for scalable vector
Lian Wang via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 19 23:30:59 PDT 2022
Author: Lian Wang
Date: 2022-06-20T06:30:26Z
New Revision: ab25e263a99bfe1327c435cf6f637fca08d2aa43
URL: https://github.com/llvm/llvm-project/commit/ab25e263a99bfe1327c435cf6f637fca08d2aa43
DIFF: https://github.com/llvm/llvm-project/commit/ab25e263a99bfe1327c435cf6f637fca08d2aa43.diff
LOG: [SelectionDAG] Enable WidenVecOp_VECREDUCE_SEQ for scalable vector
Reviewed By: sdesmalen
Differential Revision: https://reviews.llvm.org/D127710
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 9ded7144a50bb..999d25581ba6c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -6144,8 +6144,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags);
// Pad the vector with the neutral element.
- unsigned OrigElts = OrigVT.getVectorNumElements();
- unsigned WideElts = WideVT.getVectorNumElements();
+ unsigned OrigElts = OrigVT.getVectorMinNumElements();
+ unsigned WideElts = WideVT.getVectorMinNumElements();
+
+ if (WideVT.isScalableVector()) {
+ unsigned GCD = greatestCommonDivisor(OrigElts, WideElts);
+ EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
+ ElementCount::getScalable(GCD));
+ SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
+ Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral,
+ DAG.getVectorIdxConstant(Idx, dl));
+ return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags);
+ }
+
for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
DAG.getVectorIdxConstant(Idx, dl));
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
index b0a902f197c00..1ab2603b4b8e3 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
@@ -29,6 +29,79 @@ define half @fadda_nxv8f16(half %init, <vscale x 8 x half> %a) {
ret half %res
}
+define half @fadda_nxv6f16(<vscale x 6 x half> %v, half %s) {
+; CHECK-LABEL: fadda_nxv6f16:
+; CHECK: str x29, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
+; CHECK-NEXT: adrp x8, .LCPI3_0
+; CHECK-NEXT: add x8, x8, :lo12:.LCPI3_0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK-NEXT: ld1rh { z0.d }, p1/z, [x8]
+; CHECK-NEXT: st1h { z0.d }, p1, [sp, #3, mul vl]
+; CHECK-NEXT: fmov s0, s1
+; CHECK-NEXT: ld1h { z2.h }, p0/z, [sp]
+; CHECK-NEXT: fadda h0, p0, h0, z2.h
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: ldr x29, [sp], #16
+; CHECK-NEXT: ret
+ %res = call half @llvm.vector.reduce.fadd.nxv6f16(half %s, <vscale x 6 x half> %v)
+ ret half %res
+}
+
+define half @fadda_nxv10f16(<vscale x 10 x half> %v, half %s) {
+; CHECK-LABEL: fadda_nxv10f16:
+; CHECK: str x29, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: addvl sp, sp, #-3
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
+; CHECK-NEXT: adrp x8, .LCPI4_0
+; CHECK-NEXT: add x8, x8, :lo12:.LCPI4_0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: st1h { z1.h }, p0, [sp]
+; CHECK-NEXT: ld1rh { z1.d }, p1/z, [x8]
+; CHECK-NEXT: addvl x8, sp, #1
+; CHECK-NEXT: fadda h2, p0, h2, z0.h
+; CHECK-NEXT: st1h { z1.d }, p1, [sp, #1, mul vl]
+; CHECK-NEXT: ld1h { z3.h }, p0/z, [sp]
+; CHECK-NEXT: st1h { z3.h }, p0, [sp, #1, mul vl]
+; CHECK-NEXT: st1h { z1.d }, p1, [sp, #6, mul vl]
+; CHECK-NEXT: ld1h { z3.h }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT: st1h { z3.h }, p0, [sp, #2, mul vl]
+; CHECK-NEXT: st1h { z1.d }, p1, [x8, #7, mul vl]
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #2, mul vl]
+; CHECK-NEXT: fadda h2, p0, h2, z1.h
+; CHECK-NEXT: fmov s0, s2
+; CHECK-NEXT: addvl sp, sp, #3
+; CHECK-NEXT: ldr x29, [sp], #16
+; CHECK-NEXT: ret
+ %res = call half @llvm.vector.reduce.fadd.nxv10f16(half %s, <vscale x 10 x half> %v)
+ ret half %res
+}
+
+define half @fadda_nxv12f16(<vscale x 12 x half> %v, half %s) {
+; CHECK-LABEL: fadda_nxv12f16:
+; CHECK: adrp x8, .LCPI5_0
+; CHECK-NEXT: add x8, x8, :lo12:.LCPI5_0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: ld1rh { z3.s }, p0/z, [x8]
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: fadda h2, p0, h2, z0.h
+; CHECK-NEXT: uzp1 z1.h, z1.h, z3.h
+; CHECK-NEXT: fadda h2, p0, h2, z1.h
+; CHECK-NEXT: fmov s0, s2
+; CHECK-NEXT: ret
+ %res = call half @llvm.vector.reduce.fadd.nxv12f16(half %s, <vscale x 12 x half> %v)
+ ret half %res
+}
+
define float @fadda_nxv2f32(float %init, <vscale x 2 x float> %a) {
; CHECK-LABEL: fadda_nxv2f32:
; CHECK: ptrue p0.d
@@ -233,6 +306,9 @@ define double @fminv_nxv2f64(<vscale x 2 x double> %a) {
declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>)
declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>)
declare half @llvm.vector.reduce.fadd.nxv8f16(half, <vscale x 8 x half>)
+declare half @llvm.vector.reduce.fadd.nxv6f16(half, <vscale x 6 x half>)
+declare half @llvm.vector.reduce.fadd.nxv10f16(half, <vscale x 10 x half>)
+declare half @llvm.vector.reduce.fadd.nxv12f16(half, <vscale x 12 x half>)
declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>)
declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
index ecff79661b9ae..9dddd63f6cd0c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare half @llvm.vector.reduce.fadd.nxv1f16(half, <vscale x 1 x half>)
@@ -1048,3 +1048,101 @@ define float @vreduce_nsz_fadd_nxv1f32(<vscale x 1 x float> %v, float %s) {
%red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v)
ret float %red
}
+
+; Test Widen VECREDUCE_SEQ_FADD
+declare half @llvm.vector.reduce.fadd.nxv3f16(half, <vscale x 3 x half>)
+
+define half @vreduce_ord_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
+; CHECK-LABEL: vreduce_ord_fadd_nxv3f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: slli a1, a0, 1
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: fmv.h.x ft0, zero
+; CHECK-NEXT: fneg.h ft0, ft0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.v.f v9, ft0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT: vslideup.vx v8, v9, a1
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.s.f v9, fa0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfredosum.vs v8, v8, v9
+; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: ret
+ %red = call half @llvm.vector.reduce.fadd.nxv3f16(half %s, <vscale x 3 x half> %v)
+ ret half %red
+}
+
+declare half @llvm.vector.reduce.fadd.nxv6f16(half, <vscale x 6 x half>)
+
+define half @vreduce_ord_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
+; CHECK-LABEL: vreduce_ord_fadd_nxv6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: add a1, a0, a0
+; CHECK-NEXT: fmv.h.x ft0, zero
+; CHECK-NEXT: fneg.h ft0, ft0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.v.f v10, ft0
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu
+; CHECK-NEXT: vslideup.vx v9, v10, a0
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.s.f v10, fa0
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu
+; CHECK-NEXT: vfredosum.vs v8, v8, v10
+; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: ret
+ %red = call half @llvm.vector.reduce.fadd.nxv6f16(half %s, <vscale x 6 x half> %v)
+ ret half %red
+}
+
+declare half @llvm.vector.reduce.fadd.nxv10f16(half, <vscale x 10 x half>)
+
+define half @vreduce_ord_fadd_nxv10f16(<vscale x 10 x half> %v, half %s) {
+; CHECK-LABEL: vreduce_ord_fadd_nxv10f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: add a1, a0, a0
+; CHECK-NEXT: fmv.h.x ft0, zero
+; CHECK-NEXT: fneg.h ft0, ft0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.v.f v12, ft0
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu
+; CHECK-NEXT: vslideup.vx v10, v12, a0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT: vslideup.vi v11, v12, 0
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu
+; CHECK-NEXT: vslideup.vx v11, v12, a0
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.s.f v12, fa0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu
+; CHECK-NEXT: vfredosum.vs v8, v8, v12
+; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: ret
+ %red = call half @llvm.vector.reduce.fadd.nxv10f16(half %s, <vscale x 10 x half> %v)
+ ret half %red
+}
+
+declare half @llvm.vector.reduce.fadd.nxv12f16(half, <vscale x 12 x half>)
+
+define half @vreduce_ord_fadd_nxv12f16(<vscale x 12 x half> %v, half %s) {
+; CHECK-LABEL: vreduce_ord_fadd_nxv12f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.s.f v12, fa0
+; CHECK-NEXT: fmv.h.x ft0, zero
+; CHECK-NEXT: fneg.h ft0, ft0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.v.f v11, ft0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu
+; CHECK-NEXT: vfredosum.vs v8, v8, v12
+; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: ret
+ %red = call half @llvm.vector.reduce.fadd.nxv12f16(half %s, <vscale x 12 x half> %v)
+ ret half %red
+}
More information about the llvm-commits
mailing list