[llvm] 1276678 - [RISCV] Improve extract_vector_elt for fixed mask registers.
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 28 19:08:23 PST 2022
Author: jacquesguan
Date: 2022-01-29T11:07:53+08:00
New Revision: 1276678982a0c9d0d7aba6afdc83d34ee42f397c
URL: https://github.com/llvm/llvm-project/commit/1276678982a0c9d0d7aba6afdc83d34ee42f397c
DIFF: https://github.com/llvm/llvm-project/commit/1276678982a0c9d0d7aba6afdc83d34ee42f397c.diff
LOG: [RISCV] Improve extract_vector_elt for fixed mask registers.
Now the backend promotes mask vector to an i8 vector and extract element from that. We could bitcast to a widen element vector, and extract from it to GPR, then use I instruction to extract the certain bit.
Differential Revision: https://reviews.llvm.org/D117389
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f63b59fa271dc..8d5d69420d5f4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4291,8 +4291,47 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
MVT XLenVT = Subtarget.getXLenVT();
if (VecVT.getVectorElementType() == MVT::i1) {
- // FIXME: For now we just promote to an i8 vector and extract from that,
- // but this is probably not optimal.
+ if (VecVT.isFixedLengthVector()) {
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (NumElts >= 8) {
+ MVT WideEltVT;
+ unsigned WidenVecLen;
+ SDValue ExtractElementIdx;
+ SDValue ExtractBitIdx;
+ unsigned MaxEEW = Subtarget.getMaxELENForFixedLengthVectors();
+ MVT LargestEltVT = MVT::getIntegerVT(
+ std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
+ if (NumElts <= LargestEltVT.getSizeInBits()) {
+ assert(isPowerOf2_32(NumElts) &&
+ "the number of elements should be power of 2");
+ WideEltVT = MVT::getIntegerVT(NumElts);
+ WidenVecLen = 1;
+ ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
+ ExtractBitIdx = Idx;
+ } else {
+ WideEltVT = LargestEltVT;
+ WidenVecLen = NumElts / WideEltVT.getSizeInBits();
+ // extract element index = index / element width
+ ExtractElementIdx = DAG.getNode(
+ ISD::SRL, DL, XLenVT, Idx,
+ DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
+ // mask bit index = index % element width
+ ExtractBitIdx = DAG.getNode(
+ ISD::AND, DL, XLenVT, Idx,
+ DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
+ }
+ MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
+ Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
+ SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
+ Vec, ExtractElementIdx);
+ // Extract the bit from GPR.
+ SDValue ShiftRight =
+ DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
+ return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
+ DAG.getConstant(1, DL, XLenVT));
+ }
+ }
+ // Otherwise, promote to an i8 vector and extract from that.
MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
index e2236b3249d1f..80a8d8a637e0e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
@@ -2,6 +2,9 @@
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d,+zbs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32ZBS
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d,+zbs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64ZBS
+
define i1 @extractelt_v1i1(<1 x i8>* %x, i64 %idx) nounwind {
; CHECK-LABEL: extractelt_v1i1:
; CHECK: # %bb.0:
@@ -56,17 +59,43 @@ define i1 @extractelt_v4i1(<4 x i8>* %x, i64 %idx) nounwind {
}
define i1 @extractelt_v8i1(<8 x i8>* %x, i64 %idx) nounwind {
-; CHECK-LABEL: extractelt_v8i1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, mu
-; CHECK-NEXT: vslidedown.vx v8, v8, a1
-; CHECK-NEXT: vmv.x.s a0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: extractelt_v8i1:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV32-NEXT: vle8.v v8, (a0)
+; RV32-NEXT: vmseq.vi v8, v8, 0
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: srl a0, a0, a1
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: extractelt_v8i1:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV64-NEXT: vle8.v v8, (a0)
+; RV64-NEXT: vmseq.vi v8, v8, 0
+; RV64-NEXT: vmv.x.s a0, v8
+; RV64-NEXT: srl a0, a0, a1
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: ret
+;
+; RV32ZBS-LABEL: extractelt_v8i1:
+; RV32ZBS: # %bb.0:
+; RV32ZBS-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV32ZBS-NEXT: vle8.v v8, (a0)
+; RV32ZBS-NEXT: vmseq.vi v8, v8, 0
+; RV32ZBS-NEXT: vmv.x.s a0, v8
+; RV32ZBS-NEXT: bext a0, a0, a1
+; RV32ZBS-NEXT: ret
+;
+; RV64ZBS-LABEL: extractelt_v8i1:
+; RV64ZBS: # %bb.0:
+; RV64ZBS-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV64ZBS-NEXT: vle8.v v8, (a0)
+; RV64ZBS-NEXT: vmseq.vi v8, v8, 0
+; RV64ZBS-NEXT: vmv.x.s a0, v8
+; RV64ZBS-NEXT: bext a0, a0, a1
+; RV64ZBS-NEXT: ret
%a = load <8 x i8>, <8 x i8>* %x
%b = icmp eq <8 x i8> %a, zeroinitializer
%c = extractelement <8 x i1> %b, i64 %idx
@@ -74,17 +103,47 @@ define i1 @extractelt_v8i1(<8 x i8>* %x, i64 %idx) nounwind {
}
define i1 @extractelt_v16i1(<16 x i8>* %x, i64 %idx) nounwind {
-; CHECK-LABEL: extractelt_v16i1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, mu
-; CHECK-NEXT: vslidedown.vx v8, v8, a1
-; CHECK-NEXT: vmv.x.s a0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: extractelt_v16i1:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; RV32-NEXT: vle8.v v8, (a0)
+; RV32-NEXT: vmseq.vi v8, v8, 0
+; RV32-NEXT: vsetivli zero, 0, e16, mf4, ta, mu
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: srl a0, a0, a1
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: extractelt_v16i1:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; RV64-NEXT: vle8.v v8, (a0)
+; RV64-NEXT: vmseq.vi v8, v8, 0
+; RV64-NEXT: vsetivli zero, 0, e16, mf4, ta, mu
+; RV64-NEXT: vmv.x.s a0, v8
+; RV64-NEXT: srl a0, a0, a1
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: ret
+;
+; RV32ZBS-LABEL: extractelt_v16i1:
+; RV32ZBS: # %bb.0:
+; RV32ZBS-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; RV32ZBS-NEXT: vle8.v v8, (a0)
+; RV32ZBS-NEXT: vmseq.vi v8, v8, 0
+; RV32ZBS-NEXT: vsetivli zero, 0, e16, mf4, ta, mu
+; RV32ZBS-NEXT: vmv.x.s a0, v8
+; RV32ZBS-NEXT: bext a0, a0, a1
+; RV32ZBS-NEXT: ret
+;
+; RV64ZBS-LABEL: extractelt_v16i1:
+; RV64ZBS: # %bb.0:
+; RV64ZBS-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; RV64ZBS-NEXT: vle8.v v8, (a0)
+; RV64ZBS-NEXT: vmseq.vi v8, v8, 0
+; RV64ZBS-NEXT: vsetivli zero, 0, e16, mf4, ta, mu
+; RV64ZBS-NEXT: vmv.x.s a0, v8
+; RV64ZBS-NEXT: bext a0, a0, a1
+; RV64ZBS-NEXT: ret
%a = load <16 x i8>, <16 x i8>* %x
%b = icmp eq <16 x i8> %a, zeroinitializer
%c = extractelement <16 x i1> %b, i64 %idx
@@ -92,18 +151,51 @@ define i1 @extractelt_v16i1(<16 x i8>* %x, i64 %idx) nounwind {
}
define i1 @extractelt_v32i1(<32 x i8>* %x, i64 %idx) nounwind {
-; CHECK-LABEL: extractelt_v32i1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 32
-; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, mu
-; CHECK-NEXT: vslidedown.vx v8, v8, a1
-; CHECK-NEXT: vmv.x.s a0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: extractelt_v32i1:
+; RV32: # %bb.0:
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, mu
+; RV32-NEXT: vle8.v v8, (a0)
+; RV32-NEXT: vmseq.vi v10, v8, 0
+; RV32-NEXT: vsetivli zero, 0, e32, mf2, ta, mu
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: srl a0, a0, a1
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: extractelt_v32i1:
+; RV64: # %bb.0:
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, mu
+; RV64-NEXT: vle8.v v8, (a0)
+; RV64-NEXT: vmseq.vi v10, v8, 0
+; RV64-NEXT: vsetivli zero, 0, e32, mf2, ta, mu
+; RV64-NEXT: vmv.x.s a0, v10
+; RV64-NEXT: srl a0, a0, a1
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: ret
+;
+; RV32ZBS-LABEL: extractelt_v32i1:
+; RV32ZBS: # %bb.0:
+; RV32ZBS-NEXT: li a2, 32
+; RV32ZBS-NEXT: vsetvli zero, a2, e8, m2, ta, mu
+; RV32ZBS-NEXT: vle8.v v8, (a0)
+; RV32ZBS-NEXT: vmseq.vi v10, v8, 0
+; RV32ZBS-NEXT: vsetivli zero, 0, e32, mf2, ta, mu
+; RV32ZBS-NEXT: vmv.x.s a0, v10
+; RV32ZBS-NEXT: bext a0, a0, a1
+; RV32ZBS-NEXT: ret
+;
+; RV64ZBS-LABEL: extractelt_v32i1:
+; RV64ZBS: # %bb.0:
+; RV64ZBS-NEXT: li a2, 32
+; RV64ZBS-NEXT: vsetvli zero, a2, e8, m2, ta, mu
+; RV64ZBS-NEXT: vle8.v v8, (a0)
+; RV64ZBS-NEXT: vmseq.vi v10, v8, 0
+; RV64ZBS-NEXT: vsetivli zero, 0, e32, mf2, ta, mu
+; RV64ZBS-NEXT: vmv.x.s a0, v10
+; RV64ZBS-NEXT: bext a0, a0, a1
+; RV64ZBS-NEXT: ret
%a = load <32 x i8>, <32 x i8>* %x
%b = icmp eq <32 x i8> %a, zeroinitializer
%c = extractelement <32 x i1> %b, i64 %idx
@@ -111,18 +203,55 @@ define i1 @extractelt_v32i1(<32 x i8>* %x, i64 %idx) nounwind {
}
define i1 @extractelt_v64i1(<64 x i8>* %x, i64 %idx) nounwind {
-; CHECK-LABEL: extractelt_v64i1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 64
-; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, mu
-; CHECK-NEXT: vslidedown.vx v8, v8, a1
-; CHECK-NEXT: vmv.x.s a0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: extractelt_v64i1:
+; RV32: # %bb.0:
+; RV32-NEXT: li a2, 64
+; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, mu
+; RV32-NEXT: vle8.v v8, (a0)
+; RV32-NEXT: vmseq.vi v12, v8, 0
+; RV32-NEXT: srli a0, a1, 5
+; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
+; RV32-NEXT: vslidedown.vx v8, v12, a0
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: srl a0, a0, a1
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: extractelt_v64i1:
+; RV64: # %bb.0:
+; RV64-NEXT: li a2, 64
+; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, mu
+; RV64-NEXT: vle8.v v8, (a0)
+; RV64-NEXT: vmseq.vi v12, v8, 0
+; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, mu
+; RV64-NEXT: vmv.x.s a0, v12
+; RV64-NEXT: srl a0, a0, a1
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: ret
+;
+; RV32ZBS-LABEL: extractelt_v64i1:
+; RV32ZBS: # %bb.0:
+; RV32ZBS-NEXT: li a2, 64
+; RV32ZBS-NEXT: vsetvli zero, a2, e8, m4, ta, mu
+; RV32ZBS-NEXT: vle8.v v8, (a0)
+; RV32ZBS-NEXT: vmseq.vi v12, v8, 0
+; RV32ZBS-NEXT: srli a0, a1, 5
+; RV32ZBS-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
+; RV32ZBS-NEXT: vslidedown.vx v8, v12, a0
+; RV32ZBS-NEXT: vmv.x.s a0, v8
+; RV32ZBS-NEXT: bext a0, a0, a1
+; RV32ZBS-NEXT: ret
+;
+; RV64ZBS-LABEL: extractelt_v64i1:
+; RV64ZBS: # %bb.0:
+; RV64ZBS-NEXT: li a2, 64
+; RV64ZBS-NEXT: vsetvli zero, a2, e8, m4, ta, mu
+; RV64ZBS-NEXT: vle8.v v8, (a0)
+; RV64ZBS-NEXT: vmseq.vi v12, v8, 0
+; RV64ZBS-NEXT: vsetivli zero, 0, e64, m1, ta, mu
+; RV64ZBS-NEXT: vmv.x.s a0, v12
+; RV64ZBS-NEXT: bext a0, a0, a1
+; RV64ZBS-NEXT: ret
%a = load <64 x i8>, <64 x i8>* %x
%b = icmp eq <64 x i8> %a, zeroinitializer
%c = extractelement <64 x i1> %b, i64 %idx
@@ -130,18 +259,59 @@ define i1 @extractelt_v64i1(<64 x i8>* %x, i64 %idx) nounwind {
}
define i1 @extractelt_v128i1(<128 x i8>* %x, i64 %idx) nounwind {
-; CHECK-LABEL: extractelt_v128i1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 128
-; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vmseq.vi v0, v8, 0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, mu
-; CHECK-NEXT: vslidedown.vx v8, v8, a1
-; CHECK-NEXT: vmv.x.s a0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: extractelt_v128i1:
+; RV32: # %bb.0:
+; RV32-NEXT: li a2, 128
+; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; RV32-NEXT: vle8.v v8, (a0)
+; RV32-NEXT: vmseq.vi v16, v8, 0
+; RV32-NEXT: srli a0, a1, 5
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV32-NEXT: vslidedown.vx v8, v16, a0
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: srl a0, a0, a1
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: extractelt_v128i1:
+; RV64: # %bb.0:
+; RV64-NEXT: li a2, 128
+; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; RV64-NEXT: vle8.v v8, (a0)
+; RV64-NEXT: vmseq.vi v16, v8, 0
+; RV64-NEXT: srli a0, a1, 6
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu
+; RV64-NEXT: vslidedown.vx v8, v16, a0
+; RV64-NEXT: vmv.x.s a0, v8
+; RV64-NEXT: srl a0, a0, a1
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: ret
+;
+; RV32ZBS-LABEL: extractelt_v128i1:
+; RV32ZBS: # %bb.0:
+; RV32ZBS-NEXT: li a2, 128
+; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; RV32ZBS-NEXT: vle8.v v8, (a0)
+; RV32ZBS-NEXT: vmseq.vi v16, v8, 0
+; RV32ZBS-NEXT: srli a0, a1, 5
+; RV32ZBS-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV32ZBS-NEXT: vslidedown.vx v8, v16, a0
+; RV32ZBS-NEXT: vmv.x.s a0, v8
+; RV32ZBS-NEXT: bext a0, a0, a1
+; RV32ZBS-NEXT: ret
+;
+; RV64ZBS-LABEL: extractelt_v128i1:
+; RV64ZBS: # %bb.0:
+; RV64ZBS-NEXT: li a2, 128
+; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; RV64ZBS-NEXT: vle8.v v8, (a0)
+; RV64ZBS-NEXT: vmseq.vi v16, v8, 0
+; RV64ZBS-NEXT: srli a0, a1, 6
+; RV64ZBS-NEXT: vsetivli zero, 1, e64, m1, ta, mu
+; RV64ZBS-NEXT: vslidedown.vx v8, v16, a0
+; RV64ZBS-NEXT: vmv.x.s a0, v8
+; RV64ZBS-NEXT: bext a0, a0, a1
+; RV64ZBS-NEXT: ret
%a = load <128 x i8>, <128 x i8>* %x
%b = icmp eq <128 x i8> %a, zeroinitializer
%c = extractelement <128 x i1> %b, i64 %idx
@@ -212,6 +382,70 @@ define i1 @extractelt_v256i1(<256 x i8>* %x, i64 %idx) nounwind {
; RV64-NEXT: ld s0, 496(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 512
; RV64-NEXT: ret
+;
+; RV32ZBS-LABEL: extractelt_v256i1:
+; RV32ZBS: # %bb.0:
+; RV32ZBS-NEXT: addi sp, sp, -512
+; RV32ZBS-NEXT: sw ra, 508(sp) # 4-byte Folded Spill
+; RV32ZBS-NEXT: sw s0, 504(sp) # 4-byte Folded Spill
+; RV32ZBS-NEXT: addi s0, sp, 512
+; RV32ZBS-NEXT: andi sp, sp, -128
+; RV32ZBS-NEXT: andi a1, a1, 255
+; RV32ZBS-NEXT: addi a2, a0, 128
+; RV32ZBS-NEXT: li a3, 128
+; RV32ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, mu
+; RV32ZBS-NEXT: vle8.v v16, (a0)
+; RV32ZBS-NEXT: vle8.v v24, (a2)
+; RV32ZBS-NEXT: addi a0, sp, 128
+; RV32ZBS-NEXT: add a0, a0, a1
+; RV32ZBS-NEXT: vmseq.vi v8, v16, 0
+; RV32ZBS-NEXT: vmseq.vi v0, v24, 0
+; RV32ZBS-NEXT: vmv.v.i v16, 0
+; RV32ZBS-NEXT: vmerge.vim v24, v16, 1, v0
+; RV32ZBS-NEXT: addi a1, sp, 256
+; RV32ZBS-NEXT: vse8.v v24, (a1)
+; RV32ZBS-NEXT: vmv1r.v v0, v8
+; RV32ZBS-NEXT: vmerge.vim v8, v16, 1, v0
+; RV32ZBS-NEXT: addi a1, sp, 128
+; RV32ZBS-NEXT: vse8.v v8, (a1)
+; RV32ZBS-NEXT: lb a0, 0(a0)
+; RV32ZBS-NEXT: addi sp, s0, -512
+; RV32ZBS-NEXT: lw ra, 508(sp) # 4-byte Folded Reload
+; RV32ZBS-NEXT: lw s0, 504(sp) # 4-byte Folded Reload
+; RV32ZBS-NEXT: addi sp, sp, 512
+; RV32ZBS-NEXT: ret
+;
+; RV64ZBS-LABEL: extractelt_v256i1:
+; RV64ZBS: # %bb.0:
+; RV64ZBS-NEXT: addi sp, sp, -512
+; RV64ZBS-NEXT: sd ra, 504(sp) # 8-byte Folded Spill
+; RV64ZBS-NEXT: sd s0, 496(sp) # 8-byte Folded Spill
+; RV64ZBS-NEXT: addi s0, sp, 512
+; RV64ZBS-NEXT: andi sp, sp, -128
+; RV64ZBS-NEXT: andi a1, a1, 255
+; RV64ZBS-NEXT: addi a2, a0, 128
+; RV64ZBS-NEXT: li a3, 128
+; RV64ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, mu
+; RV64ZBS-NEXT: vle8.v v16, (a0)
+; RV64ZBS-NEXT: vle8.v v24, (a2)
+; RV64ZBS-NEXT: addi a0, sp, 128
+; RV64ZBS-NEXT: add a0, a0, a1
+; RV64ZBS-NEXT: vmseq.vi v8, v16, 0
+; RV64ZBS-NEXT: vmseq.vi v0, v24, 0
+; RV64ZBS-NEXT: vmv.v.i v16, 0
+; RV64ZBS-NEXT: vmerge.vim v24, v16, 1, v0
+; RV64ZBS-NEXT: addi a1, sp, 256
+; RV64ZBS-NEXT: vse8.v v24, (a1)
+; RV64ZBS-NEXT: vmv1r.v v0, v8
+; RV64ZBS-NEXT: vmerge.vim v8, v16, 1, v0
+; RV64ZBS-NEXT: addi a1, sp, 128
+; RV64ZBS-NEXT: vse8.v v8, (a1)
+; RV64ZBS-NEXT: lb a0, 0(a0)
+; RV64ZBS-NEXT: addi sp, s0, -512
+; RV64ZBS-NEXT: ld ra, 504(sp) # 8-byte Folded Reload
+; RV64ZBS-NEXT: ld s0, 496(sp) # 8-byte Folded Reload
+; RV64ZBS-NEXT: addi sp, sp, 512
+; RV64ZBS-NEXT: ret
%a = load <256 x i8>, <256 x i8>* %x
%b = icmp eq <256 x i8> %a, zeroinitializer
%c = extractelement <256 x i1> %b, i64 %idx
More information about the llvm-commits
mailing list