[llvm] 770fe86 - [SelectionDAG] Enable WidenVecOp_VECREDUCE for scalable vector
Lian Wang via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 23 19:44:12 PDT 2022
Author: Lian Wang
Date: 2022-06-24T02:32:53Z
New Revision: 770fe864feb34d078665bd51f5cd72025f5dcccd
URL: https://github.com/llvm/llvm-project/commit/770fe864feb34d078665bd51f5cd72025f5dcccd
DIFF: https://github.com/llvm/llvm-project/commit/770fe864feb34d078665bd51f5cd72025f5dcccd.diff
LOG: [SelectionDAG] Enable WidenVecOp_VECREDUCE for scalable vector
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D128239
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/AArch64/sve-int-reduce.ll
llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index d497c1803d7b..1f8b7d48685b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -6122,8 +6122,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
assert(NeutralElem && "Neutral element must exist");
// Pad the vector with the neutral element.
- unsigned OrigElts = OrigVT.getVectorNumElements();
- unsigned WideElts = WideVT.getVectorNumElements();
+ unsigned OrigElts = OrigVT.getVectorMinNumElements();
+ unsigned WideElts = WideVT.getVectorMinNumElements();
+
+ if (WideVT.isScalableVector()) {
+ unsigned GCD = greatestCommonDivisor(OrigElts, WideElts);
+ EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
+ ElementCount::getScalable(GCD));
+ SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
+ Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral,
+ DAG.getVectorIdxConstant(Idx, dl));
+ return DAG.getNode(Opc, dl, N->getValueType(0), Op, Flags);
+ }
+
for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
DAG.getVectorIdxConstant(Idx, dl));
diff --git a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll
index 0d3f605d6d8c..fea3c4aa455b 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll
@@ -372,6 +372,85 @@ define i64 @smax_nxv2i64(<vscale x 2 x i64> %a) {
ret i64 %res
}
+; Test widen vector reduce type
+declare i8 @llvm.vector.reduce.smin.nxv10i8(<vscale x 10 x i8>)
+
+define i8 @smin_nxv10i8(<vscale x 10 x i8> %a) {
+; CHECK-LABEL: smin_nxv10i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpkhi z2.h, z0.b
+; CHECK-NEXT: mov z1.d, #127 // =0x7f
+; CHECK-NEXT: uunpklo z3.s, z2.h
+; CHECK-NEXT: uunpkhi z2.s, z2.h
+; CHECK-NEXT: uunpklo z3.d, z3.s
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uzp1 z3.s, z3.s, z1.s
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h
+; CHECK-NEXT: uzp1 z2.b, z0.b, z2.b
+; CHECK-NEXT: uunpkhi z2.h, z2.b
+; CHECK-NEXT: uunpkhi z3.s, z2.h
+; CHECK-NEXT: uunpklo z2.s, z2.h
+; CHECK-NEXT: uunpkhi z3.d, z3.s
+; CHECK-NEXT: uzp1 z3.s, z1.s, z3.s
+; CHECK-NEXT: uzp1 z2.h, z2.h, z3.h
+; CHECK-NEXT: uzp1 z2.b, z0.b, z2.b
+; CHECK-NEXT: uunpkhi z2.h, z2.b
+; CHECK-NEXT: uunpkhi z3.s, z2.h
+; CHECK-NEXT: uunpklo z2.s, z2.h
+; CHECK-NEXT: uunpklo z3.d, z3.s
+; CHECK-NEXT: uzp1 z1.s, z3.s, z1.s
+; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
+; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT: sminv b0, p0, z0.b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+ %res = call i8 @llvm.vector.reduce.smin.nxv10i8(<vscale x 10 x i8> %a)
+ ret i8 %res
+}
+
+declare i8 @llvm.vector.reduce.add.nxv12i8(<vscale x 12 x i8>)
+
+define i8 @uaddv_nxv12i8(<vscale x 12 x i8> %a) {
+; CHECK-LABEL: uaddv_nxv12i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpkhi z2.h, z0.b
+; CHECK-NEXT: mov z1.s, #0 // =0x0
+; CHECK-NEXT: uunpklo z2.s, z2.h
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT: uaddv d0, p0, z0.b
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %res = call i8 @llvm.vector.reduce.add.nxv12i8(<vscale x 12 x i8> %a)
+ ret i8 %res
+}
+
+declare i8 @llvm.vector.reduce.umax.nxv14i8(<vscale x 14 x i8>)
+
+define i8 @umax_nxv14i8(<vscale x 14 x i8> %a) {
+; CHECK-LABEL: umax_nxv14i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uunpkhi z2.h, z0.b
+; CHECK-NEXT: mov z1.d, #0 // =0x0
+; CHECK-NEXT: uunpkhi z3.s, z2.h
+; CHECK-NEXT: uunpklo z2.s, z2.h
+; CHECK-NEXT: uunpklo z3.d, z3.s
+; CHECK-NEXT: uunpklo z0.h, z0.b
+; CHECK-NEXT: uzp1 z1.s, z3.s, z1.s
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
+; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
+; CHECK-NEXT: umaxv b0, p0, z0.b
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+ %res = call i8 @llvm.vector.reduce.umax.nxv14i8(<vscale x 14 x i8> %a)
+ ret i8 %res
+}
+
declare i8 @llvm.vector.reduce.and.nxv16i8(<vscale x 16 x i8>)
declare i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16>)
declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
index 9dddd63f6cd0..b00dfc2a332f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
@@ -1146,3 +1146,97 @@ define half @vreduce_ord_fadd_nxv12f16(<vscale x 12 x half> %v, half %s) {
%red = call half @llvm.vector.reduce.fadd.nxv12f16(half %s, <vscale x 12 x half> %v)
ret half %red
}
+
+; Test Widen vector reduce type (fadd/fmin/fmax)
+define half @vreduce_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
+; CHECK-LABEL: vreduce_fadd_nxv3f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: slli a1, a0, 1
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: fmv.h.x ft0, zero
+; CHECK-NEXT: fneg.h ft0, ft0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.v.f v9, ft0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT: vslideup.vx v8, v9, a1
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.s.f v9, fa0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfredusum.vs v8, v8, v9
+; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: ret
+ %red = call reassoc half @llvm.vector.reduce.fadd.nxv3f16(half %s, <vscale x 3 x half> %v)
+ ret half %red
+}
+
+define half @vreduce_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
+; CHECK-LABEL: vreduce_fadd_nxv6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: add a1, a0, a0
+; CHECK-NEXT: fmv.h.x ft0, zero
+; CHECK-NEXT: fneg.h ft0, ft0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.v.f v10, ft0
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu
+; CHECK-NEXT: vslideup.vx v9, v10, a0
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.s.f v10, fa0
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu
+; CHECK-NEXT: vfredusum.vs v8, v8, v10
+; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: ret
+ %red = call reassoc half @llvm.vector.reduce.fadd.nxv6f16(half %s, <vscale x 6 x half> %v)
+ ret half %red
+}
+
+declare half @llvm.vector.reduce.fmin.nxv10f16(<vscale x 10 x half>)
+
+define half @vreduce_fmin_nxv10f16(<vscale x 10 x half> %v) {
+; CHECK-LABEL: vreduce_fmin_nxv10f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: lui a1, %hi(.LCPI73_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI73_0)(a1)
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: add a1, a0, a0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.v.f v12, ft0
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu
+; CHECK-NEXT: vslideup.vx v10, v12, a0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT: vslideup.vi v11, v12, 0
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu
+; CHECK-NEXT: vslideup.vx v11, v12, a0
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.s.f v12, ft0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu
+; CHECK-NEXT: vfredmin.vs v8, v8, v12
+; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: ret
+ %red = call half @llvm.vector.reduce.fmin.nxv10f16(<vscale x 10 x half> %v)
+ ret half %red
+}
+
+declare half @llvm.vector.reduce.fmax.nxv12f16(<vscale x 12 x half>)
+
+define half @vreduce_fmax_nxv12f16(<vscale x 12 x half> %v) {
+; CHECK-LABEL: vreduce_fmax_nxv12f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI74_0)
+; CHECK-NEXT: flh ft0, %lo(.LCPI74_0)(a0)
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.s.f v12, ft0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfmv.v.f v11, ft0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu
+; CHECK-NEXT: vfredmax.vs v8, v8, v12
+; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: ret
+ %red = call half @llvm.vector.reduce.fmax.nxv12f16(<vscale x 12 x half> %v)
+ ret half %red
+}
More information about the llvm-commits
mailing list