[llvm] 96ab083 - [RISCV] Support VECTOR_REVERSE mask operation.
Lian Wang via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 28 00:58:18 PDT 2022
Author: Lian Wang
Date: 2022-06-28T07:48:51Z
New Revision: 96ab083622f5dc4b19feefbd3dd1882926b60bb5
URL: https://github.com/llvm/llvm-project/commit/96ab083622f5dc4b19feefbd3dd1882926b60bb5
DIFF: https://github.com/llvm/llvm-project/commit/96ab083622f5dc4b19feefbd3dd1882926b60bb5.diff
LOG: [RISCV] Support VECTOR_REVERSE mask operation.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D128627
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 72b1d4ce82d7..27fbc93fa790 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -525,6 +525,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(
{ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_TRUNCATE, ISD::VP_SETCC}, VT,
Custom);
+ setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
}
for (MVT VT : IntVecVTs) {
@@ -5638,6 +5639,12 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VecVT = Op.getSimpleValueType();
+ if (VecVT.getVectorElementType() == MVT::i1) {
+ MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
+ SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
+ SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
+ return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
+ }
unsigned EltSize = VecVT.getScalarSizeInBits();
unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
index 43aaf18aef8d..618537b5281f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
@@ -9,38 +9,600 @@
;
; VECTOR_REVERSE - masks
;
-; FIXME: Support for mask vectors
-;define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
-; %res = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %a)
-; ret <vscale x 2 x i1> %res
-;}
-;
-;define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
-; %res = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %a)
-; ret <vscale x 4 x i1> %res
-;}
-;
-;define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
-; %res = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %a)
-; ret <vscale x 8 x i1> %res
-;}
-;
-;define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
-; %res = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %a)
-; ret <vscale x 16 x i1> %res
-;}
-;
-;define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
-; %res = call <vscale x 32 x i1> @llvm.experimental.vector.reverse.nxv32i1(<vscale x 32 x i1> %a)
-; ret <vscale x 32 x i1> %res
-;}
-;
-;define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
-; %res = call <vscale x 64 x i1> @llvm.experimental.vector.reverse.nxv64i1(<vscale x 64 x i1> %a)
-; ret <vscale x 64 x i1> %res
-;}
+define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
+; RV32-BITS-UNKNOWN-LABEL: reverse_nxv2i1:
+; RV32-BITS-UNKNOWN: # %bb.0:
+; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
+; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 2
+; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vid.v v9
+; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9
+; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1
+; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-UNKNOWN-NEXT: ret
+;
+; RV32-BITS-256-LABEL: reverse_nxv2i1:
+; RV32-BITS-256: # %bb.0:
+; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
+; RV32-BITS-256-NEXT: vmv.v.i v8, 0
+; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-256-NEXT: csrr a0, vlenb
+; RV32-BITS-256-NEXT: srli a0, a0, 2
+; RV32-BITS-256-NEXT: addi a0, a0, -1
+; RV32-BITS-256-NEXT: vid.v v9
+; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0
+; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9
+; RV32-BITS-256-NEXT: vand.vi v8, v10, 1
+; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-256-NEXT: ret
+;
+; RV32-BITS-512-LABEL: reverse_nxv2i1:
+; RV32-BITS-512: # %bb.0:
+; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
+; RV32-BITS-512-NEXT: vmv.v.i v8, 0
+; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-512-NEXT: csrr a0, vlenb
+; RV32-BITS-512-NEXT: srli a0, a0, 2
+; RV32-BITS-512-NEXT: addi a0, a0, -1
+; RV32-BITS-512-NEXT: vid.v v9
+; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0
+; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9
+; RV32-BITS-512-NEXT: vand.vi v8, v10, 1
+; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-512-NEXT: ret
+;
+; RV64-BITS-UNKNOWN-LABEL: reverse_nxv2i1:
+; RV64-BITS-UNKNOWN: # %bb.0:
+; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
+; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vid.v v9
+; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9
+; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1
+; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-UNKNOWN-NEXT: ret
+;
+; RV64-BITS-256-LABEL: reverse_nxv2i1:
+; RV64-BITS-256: # %bb.0:
+; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
+; RV64-BITS-256-NEXT: vmv.v.i v8, 0
+; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-256-NEXT: csrr a0, vlenb
+; RV64-BITS-256-NEXT: srli a0, a0, 2
+; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: vid.v v9
+; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0
+; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9
+; RV64-BITS-256-NEXT: vand.vi v8, v10, 1
+; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-256-NEXT: ret
+;
+; RV64-BITS-512-LABEL: reverse_nxv2i1:
+; RV64-BITS-512: # %bb.0:
+; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
+; RV64-BITS-512-NEXT: vmv.v.i v8, 0
+; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-512-NEXT: csrr a0, vlenb
+; RV64-BITS-512-NEXT: srli a0, a0, 2
+; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: vid.v v9
+; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0
+; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9
+; RV64-BITS-512-NEXT: vand.vi v8, v10, 1
+; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-512-NEXT: ret
+ %res = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %a)
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
+; RV32-BITS-UNKNOWN-LABEL: reverse_nxv4i1:
+; RV32-BITS-UNKNOWN: # %bb.0:
+; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, mf2, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
+; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 1
+; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vid.v v9
+; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9
+; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1
+; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-UNKNOWN-NEXT: ret
+;
+; RV32-BITS-256-LABEL: reverse_nxv4i1:
+; RV32-BITS-256: # %bb.0:
+; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, mf2, ta, mu
+; RV32-BITS-256-NEXT: vmv.v.i v8, 0
+; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-256-NEXT: csrr a0, vlenb
+; RV32-BITS-256-NEXT: srli a0, a0, 1
+; RV32-BITS-256-NEXT: addi a0, a0, -1
+; RV32-BITS-256-NEXT: vid.v v9
+; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0
+; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9
+; RV32-BITS-256-NEXT: vand.vi v8, v10, 1
+; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-256-NEXT: ret
+;
+; RV32-BITS-512-LABEL: reverse_nxv4i1:
+; RV32-BITS-512: # %bb.0:
+; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, mf2, ta, mu
+; RV32-BITS-512-NEXT: vmv.v.i v8, 0
+; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-512-NEXT: csrr a0, vlenb
+; RV32-BITS-512-NEXT: srli a0, a0, 1
+; RV32-BITS-512-NEXT: addi a0, a0, -1
+; RV32-BITS-512-NEXT: vid.v v9
+; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0
+; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9
+; RV32-BITS-512-NEXT: vand.vi v8, v10, 1
+; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-512-NEXT: ret
+;
+; RV64-BITS-UNKNOWN-LABEL: reverse_nxv4i1:
+; RV64-BITS-UNKNOWN: # %bb.0:
+; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, mf2, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
+; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vid.v v9
+; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9
+; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1
+; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-UNKNOWN-NEXT: ret
+;
+; RV64-BITS-256-LABEL: reverse_nxv4i1:
+; RV64-BITS-256: # %bb.0:
+; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, mf2, ta, mu
+; RV64-BITS-256-NEXT: vmv.v.i v8, 0
+; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-256-NEXT: csrr a0, vlenb
+; RV64-BITS-256-NEXT: srli a0, a0, 1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: vid.v v9
+; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0
+; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9
+; RV64-BITS-256-NEXT: vand.vi v8, v10, 1
+; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-256-NEXT: ret
+;
+; RV64-BITS-512-LABEL: reverse_nxv4i1:
+; RV64-BITS-512: # %bb.0:
+; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, mf2, ta, mu
+; RV64-BITS-512-NEXT: vmv.v.i v8, 0
+; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-512-NEXT: csrr a0, vlenb
+; RV64-BITS-512-NEXT: srli a0, a0, 1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: vid.v v9
+; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0
+; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9
+; RV64-BITS-512-NEXT: vand.vi v8, v10, 1
+; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-512-NEXT: ret
+ %res = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %a)
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
+; RV32-BITS-UNKNOWN-LABEL: reverse_nxv8i1:
+; RV32-BITS-UNKNOWN: # %bb.0:
+; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
+; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vid.v v10
+; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10
+; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v9, 1
+; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-UNKNOWN-NEXT: ret
+;
+; RV32-BITS-256-LABEL: reverse_nxv8i1:
+; RV32-BITS-256: # %bb.0:
+; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, mu
+; RV32-BITS-256-NEXT: vmv.v.i v8, 0
+; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-256-NEXT: csrr a0, vlenb
+; RV32-BITS-256-NEXT: addi a0, a0, -1
+; RV32-BITS-256-NEXT: vid.v v9
+; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0
+; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9
+; RV32-BITS-256-NEXT: vand.vi v8, v10, 1
+; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-256-NEXT: ret
+;
+; RV32-BITS-512-LABEL: reverse_nxv8i1:
+; RV32-BITS-512: # %bb.0:
+; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, mu
+; RV32-BITS-512-NEXT: vmv.v.i v8, 0
+; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-512-NEXT: csrr a0, vlenb
+; RV32-BITS-512-NEXT: addi a0, a0, -1
+; RV32-BITS-512-NEXT: vid.v v9
+; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0
+; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9
+; RV32-BITS-512-NEXT: vand.vi v8, v10, 1
+; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-512-NEXT: ret
+;
+; RV64-BITS-UNKNOWN-LABEL: reverse_nxv8i1:
+; RV64-BITS-UNKNOWN: # %bb.0:
+; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
+; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vid.v v10
+; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10
+; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v9, 1
+; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-UNKNOWN-NEXT: ret
+;
+; RV64-BITS-256-LABEL: reverse_nxv8i1:
+; RV64-BITS-256: # %bb.0:
+; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, mu
+; RV64-BITS-256-NEXT: vmv.v.i v8, 0
+; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-256-NEXT: csrr a0, vlenb
+; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: vid.v v9
+; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0
+; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9
+; RV64-BITS-256-NEXT: vand.vi v8, v10, 1
+; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-256-NEXT: ret
+;
+; RV64-BITS-512-LABEL: reverse_nxv8i1:
+; RV64-BITS-512: # %bb.0:
+; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, mu
+; RV64-BITS-512-NEXT: vmv.v.i v8, 0
+; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-512-NEXT: csrr a0, vlenb
+; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: vid.v v9
+; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0
+; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9
+; RV64-BITS-512-NEXT: vand.vi v8, v10, 1
+; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-512-NEXT: ret
+ %res = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %a)
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
+; RV32-BITS-UNKNOWN-LABEL: reverse_nxv16i1:
+; RV32-BITS-UNKNOWN: # %bb.0:
+; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m2, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
+; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1
+; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vid.v v12
+; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m2, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v12
+; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1
+; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-UNKNOWN-NEXT: ret
+;
+; RV32-BITS-256-LABEL: reverse_nxv16i1:
+; RV32-BITS-256: # %bb.0:
+; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, mu
+; RV32-BITS-256-NEXT: vmv.v.i v8, 0
+; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-256-NEXT: csrr a0, vlenb
+; RV32-BITS-256-NEXT: slli a0, a0, 1
+; RV32-BITS-256-NEXT: addi a0, a0, -1
+; RV32-BITS-256-NEXT: vid.v v10
+; RV32-BITS-256-NEXT: vrsub.vx v10, v10, a0
+; RV32-BITS-256-NEXT: vrgather.vv v12, v8, v10
+; RV32-BITS-256-NEXT: vand.vi v8, v12, 1
+; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-256-NEXT: ret
+;
+; RV32-BITS-512-LABEL: reverse_nxv16i1:
+; RV32-BITS-512: # %bb.0:
+; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, mu
+; RV32-BITS-512-NEXT: vmv.v.i v8, 0
+; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-512-NEXT: csrr a0, vlenb
+; RV32-BITS-512-NEXT: slli a0, a0, 1
+; RV32-BITS-512-NEXT: addi a0, a0, -1
+; RV32-BITS-512-NEXT: vid.v v10
+; RV32-BITS-512-NEXT: vrsub.vx v10, v10, a0
+; RV32-BITS-512-NEXT: vrgather.vv v12, v8, v10
+; RV32-BITS-512-NEXT: vand.vi v8, v12, 1
+; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-512-NEXT: ret
+;
+; RV64-BITS-UNKNOWN-LABEL: reverse_nxv16i1:
+; RV64-BITS-UNKNOWN: # %bb.0:
+; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m2, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
+; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vid.v v12
+; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m2, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v12
+; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1
+; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-UNKNOWN-NEXT: ret
+;
+; RV64-BITS-256-LABEL: reverse_nxv16i1:
+; RV64-BITS-256: # %bb.0:
+; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, mu
+; RV64-BITS-256-NEXT: vmv.v.i v8, 0
+; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-256-NEXT: csrr a0, vlenb
+; RV64-BITS-256-NEXT: slli a0, a0, 1
+; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: vid.v v10
+; RV64-BITS-256-NEXT: vrsub.vx v10, v10, a0
+; RV64-BITS-256-NEXT: vrgather.vv v12, v8, v10
+; RV64-BITS-256-NEXT: vand.vi v8, v12, 1
+; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-256-NEXT: ret
+;
+; RV64-BITS-512-LABEL: reverse_nxv16i1:
+; RV64-BITS-512: # %bb.0:
+; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, mu
+; RV64-BITS-512-NEXT: vmv.v.i v8, 0
+; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-512-NEXT: csrr a0, vlenb
+; RV64-BITS-512-NEXT: slli a0, a0, 1
+; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: vid.v v10
+; RV64-BITS-512-NEXT: vrsub.vx v10, v10, a0
+; RV64-BITS-512-NEXT: vrgather.vv v12, v8, v10
+; RV64-BITS-512-NEXT: vand.vi v8, v12, 1
+; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-512-NEXT: ret
+ %res = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %a)
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
+; RV32-BITS-UNKNOWN-LABEL: reverse_nxv32i1:
+; RV32-BITS-UNKNOWN: # %bb.0:
+; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
+; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2
+; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vid.v v16
+; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0
+; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v8, v16
+; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v12, 1
+; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-UNKNOWN-NEXT: ret
+;
+; RV32-BITS-256-LABEL: reverse_nxv32i1:
+; RV32-BITS-256: # %bb.0:
+; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, mu
+; RV32-BITS-256-NEXT: vmv.v.i v8, 0
+; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-256-NEXT: csrr a0, vlenb
+; RV32-BITS-256-NEXT: slli a0, a0, 2
+; RV32-BITS-256-NEXT: addi a0, a0, -1
+; RV32-BITS-256-NEXT: vid.v v12
+; RV32-BITS-256-NEXT: vrsub.vx v12, v12, a0
+; RV32-BITS-256-NEXT: vrgather.vv v16, v8, v12
+; RV32-BITS-256-NEXT: vand.vi v8, v16, 1
+; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-256-NEXT: ret
+;
+; RV32-BITS-512-LABEL: reverse_nxv32i1:
+; RV32-BITS-512: # %bb.0:
+; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, mu
+; RV32-BITS-512-NEXT: vmv.v.i v8, 0
+; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-512-NEXT: csrr a0, vlenb
+; RV32-BITS-512-NEXT: slli a0, a0, 2
+; RV32-BITS-512-NEXT: addi a0, a0, -1
+; RV32-BITS-512-NEXT: vid.v v12
+; RV32-BITS-512-NEXT: vrsub.vx v12, v12, a0
+; RV32-BITS-512-NEXT: vrgather.vv v16, v8, v12
+; RV32-BITS-512-NEXT: vand.vi v8, v16, 1
+; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-512-NEXT: ret
+;
+; RV64-BITS-UNKNOWN-LABEL: reverse_nxv32i1:
+; RV64-BITS-UNKNOWN: # %bb.0:
+; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
+; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vid.v v16
+; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0
+; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v8, v16
+; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v12, 1
+; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-UNKNOWN-NEXT: ret
+;
+; RV64-BITS-256-LABEL: reverse_nxv32i1:
+; RV64-BITS-256: # %bb.0:
+; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, mu
+; RV64-BITS-256-NEXT: vmv.v.i v8, 0
+; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-256-NEXT: csrr a0, vlenb
+; RV64-BITS-256-NEXT: slli a0, a0, 2
+; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: vid.v v12
+; RV64-BITS-256-NEXT: vrsub.vx v12, v12, a0
+; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v12
+; RV64-BITS-256-NEXT: vand.vi v8, v16, 1
+; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-256-NEXT: ret
+;
+; RV64-BITS-512-LABEL: reverse_nxv32i1:
+; RV64-BITS-512: # %bb.0:
+; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, mu
+; RV64-BITS-512-NEXT: vmv.v.i v8, 0
+; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-512-NEXT: csrr a0, vlenb
+; RV64-BITS-512-NEXT: slli a0, a0, 2
+; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: vid.v v12
+; RV64-BITS-512-NEXT: vrsub.vx v12, v12, a0
+; RV64-BITS-512-NEXT: vrgather.vv v16, v8, v12
+; RV64-BITS-512-NEXT: vand.vi v8, v16, 1
+; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-512-NEXT: ret
+ %res = call <vscale x 32 x i1> @llvm.experimental.vector.reverse.nxv32i1(<vscale x 32 x i1> %a)
+ ret <vscale x 32 x i1> %res
+}
+define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
+; RV32-BITS-UNKNOWN-LABEL: reverse_nxv64i1:
+; RV32-BITS-UNKNOWN: # %bb.0:
+; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2
+; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vid.v v8
+; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0
+; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0
+; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v16, v16, 1, v0
+; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v16, v8
+; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v20, v8
+; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, mu
+; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1
+; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-UNKNOWN-NEXT: ret
+;
+; RV32-BITS-256-LABEL: reverse_nxv64i1:
+; RV32-BITS-256: # %bb.0:
+; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, mu
+; RV32-BITS-256-NEXT: vmv.v.i v8, 0
+; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-BITS-256-NEXT: csrr a0, vlenb
+; RV32-BITS-256-NEXT: slli a0, a0, 3
+; RV32-BITS-256-NEXT: addi a0, a0, -1
+; RV32-BITS-256-NEXT: vid.v v16
+; RV32-BITS-256-NEXT: vrsub.vx v16, v16, a0
+; RV32-BITS-256-NEXT: vrgather.vv v24, v8, v16
+; RV32-BITS-256-NEXT: vand.vi v8, v24, 1
+; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-256-NEXT: ret
+;
+; RV32-BITS-512-LABEL: reverse_nxv64i1:
+; RV32-BITS-512: # %bb.0:
+; RV32-BITS-512-NEXT: csrr a0, vlenb
+; RV32-BITS-512-NEXT: slli a0, a0, 2
+; RV32-BITS-512-NEXT: addi a0, a0, -1
+; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, mu
+; RV32-BITS-512-NEXT: vid.v v8
+; RV32-BITS-512-NEXT: vrsub.vx v8, v8, a0
+; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, mu
+; RV32-BITS-512-NEXT: vmv.v.i v16, 0
+; RV32-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0
+; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, mu
+; RV32-BITS-512-NEXT: vrgather.vv v28, v16, v8
+; RV32-BITS-512-NEXT: vrgather.vv v24, v20, v8
+; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, mu
+; RV32-BITS-512-NEXT: vand.vi v8, v24, 1
+; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-512-NEXT: ret
+;
+; RV64-BITS-UNKNOWN-LABEL: reverse_nxv64i1:
+; RV64-BITS-UNKNOWN: # %bb.0:
+; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
+; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2
+; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vid.v v8
+; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0
+; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0
+; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v16, v16, 1, v0
+; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v16, v8
+; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v20, v8
+; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, mu
+; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1
+; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-UNKNOWN-NEXT: ret
+;
+; RV64-BITS-256-LABEL: reverse_nxv64i1:
+; RV64-BITS-256: # %bb.0:
+; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, mu
+; RV64-BITS-256-NEXT: vmv.v.i v8, 0
+; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64-BITS-256-NEXT: csrr a0, vlenb
+; RV64-BITS-256-NEXT: slli a0, a0, 3
+; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: vid.v v16
+; RV64-BITS-256-NEXT: vrsub.vx v16, v16, a0
+; RV64-BITS-256-NEXT: vrgather.vv v24, v8, v16
+; RV64-BITS-256-NEXT: vand.vi v8, v24, 1
+; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-256-NEXT: ret
+;
+; RV64-BITS-512-LABEL: reverse_nxv64i1:
+; RV64-BITS-512: # %bb.0:
+; RV64-BITS-512-NEXT: csrr a0, vlenb
+; RV64-BITS-512-NEXT: slli a0, a0, 2
+; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, mu
+; RV64-BITS-512-NEXT: vid.v v8
+; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0
+; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, mu
+; RV64-BITS-512-NEXT: vmv.v.i v16, 0
+; RV64-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0
+; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, mu
+; RV64-BITS-512-NEXT: vrgather.vv v28, v16, v8
+; RV64-BITS-512-NEXT: vrgather.vv v24, v20, v8
+; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, mu
+; RV64-BITS-512-NEXT: vand.vi v8, v24, 1
+; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-512-NEXT: ret
+ %res = call <vscale x 64 x i1> @llvm.experimental.vector.reverse.nxv64i1(<vscale x 64 x i1> %a)
+ ret <vscale x 64 x i1> %res
+}
;
; VECTOR_REVERSE - integer
More information about the llvm-commits
mailing list