[llvm] [RISCV] Add codegen support for ri.vinsert.v.x and ri.vextract.x.v (PR #136708)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 22 08:15:34 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Philip Reames (preames)
<details>
<summary>Changes</summary>
These instructions are included in XRivosVisni. They perform a scalar insert into a vector (with a potentially non-zero index) and a scalar extract from a vector (with a potentially non-zero index) respectively. They're very analogous to vmv.s.x and vmv.x.s respectively.
The instructions do have a couple restrictions:
1) Only constant indices are supported w/a uimm5 format.
2) There are no FP variants.
One important property of these instructions is that their throughput and latency are expected to be LMUL independent.
---
Patch is 62.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136708.diff
6 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+39-2)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.h (+10)
- (modified) llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp (+21-1)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td (+55)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll (+447)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll (+364)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 98fba9e86e88a..30ef1b5d8d209 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -6935,7 +6935,7 @@ static bool hasPassthruOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(
- RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 132 &&
+ RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 &&
RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
"adding target specific op should update this function");
if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
@@ -6959,7 +6959,7 @@ static bool hasMaskOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(
- RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 132 &&
+ RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 133 &&
RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
"adding target specific op should update this function");
if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
@@ -9567,6 +9567,13 @@ getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
return SmallerVT;
}
+static bool isValidInsertExtractIndex(SDValue Idx) {
+ auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
+ if (!IdxC || isNullConstant(Idx))
+ return false;
+ return IdxC->getZExtValue() < 32;
+}
+
// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
// first position of a vector, and that vector is slid up to the insert index.
// By limiting the active vector length to index+1 and merging with the
@@ -9677,6 +9684,26 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
return Vec;
return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
}
+
+ // Use ri.vinsert.v.x if available.
+ if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
+ isValidInsertExtractIndex(Idx)) {
+ unsigned Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;
+ if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
+ Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
+ Policy = RISCVVType::TAIL_AGNOSTIC;
+ SDValue PolicyOp =
+ DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
+ Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
+ VL, PolicyOp);
+ if (AlignedIdx)
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+ Vec, AlignedIdx);
+ if (!VecVT.isFixedLengthVector())
+ return Vec;
+ return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
+ }
+
ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
} else {
// On RV32, i64-element vectors must be specially handled to place the
@@ -9876,6 +9903,14 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
}
}
+ // Use ri.vextract.x.v if available.
+ // TODO: Avoid index 0 and just use the vmv.x.s
+ if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
+ isValidInsertExtractIndex(Idx)) {
+ SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
+ return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
+ }
+
// If after narrowing, the required slide is still greater than LMUL2,
// fallback to generic expansion and go through the stack. This is done
// for a subtle reason: extracting *all* elements out of a vector is
@@ -22253,11 +22288,13 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VZEXT_VL)
NODE_NAME_CASE(VCPOP_VL)
NODE_NAME_CASE(VFIRST_VL)
+ NODE_NAME_CASE(RI_VINSERT_VL)
NODE_NAME_CASE(RI_VZIPEVEN_VL)
NODE_NAME_CASE(RI_VZIPODD_VL)
NODE_NAME_CASE(RI_VZIP2A_VL)
NODE_NAME_CASE(RI_VUNZIP2A_VL)
NODE_NAME_CASE(RI_VUNZIP2B_VL)
+ NODE_NAME_CASE(RI_VEXTRACT)
NODE_NAME_CASE(READ_CSR)
NODE_NAME_CASE(WRITE_CSR)
NODE_NAME_CASE(SWAP_CSR)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index baf1b2e4d8e6e..871b6b0c53775 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -404,6 +404,10 @@ enum NodeType : unsigned {
// vfirst.m with additional mask and VL operands.
VFIRST_VL,
+ // XRivosVisni
+ // VINSERT matches the semantics of ri.vinsert.v.x. It carries a VL operand.
+ RI_VINSERT_VL,
+
// XRivosVizip
RI_VZIPEVEN_VL,
RI_VZIPODD_VL,
@@ -413,6 +417,12 @@ enum NodeType : unsigned {
LAST_VL_VECTOR_OP = RI_VUNZIP2B_VL,
+ // XRivosVisni
+ // VEXTRACT matches the semantics of ri.vextract.x.v. The result is always
+ // XLenVT sign extended from the vector element size. VEXTRACT does *not*
+ // have a VL operand.
+ RI_VEXTRACT,
+
// Read VLENB CSR
READ_VLENB,
// Reads value of CSR.
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 2247610c21ffb..928b9c1f5b5ba 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -94,6 +94,14 @@ static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
}
}
+static bool isVInsertInstr(const MachineInstr &MI) {
+ return RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::RI_VINSERT;
+}
+
+static bool isVExtractInstr(const MachineInstr &MI) {
+ return RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::RI_VEXTRACT;
+}
+
static bool isScalarExtractInstr(const MachineInstr &MI) {
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
default:
@@ -538,6 +546,18 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
Res.MaskPolicy = false;
}
+ if (isVExtractInstr(MI)) {
+ assert(!RISCVII::hasVLOp(TSFlags));
+ // TODO: LMUL can be any larger value (without cost)
+ Res.TailPolicy = false;
+ Res.MaskPolicy = false;
+ }
+
+ if (isVInsertInstr(MI)) {
+ // TODO: LMUL can be any larger value (without cost)
+ Res.MaskPolicy = false;
+ }
+
return Res;
}
@@ -1085,7 +1105,7 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
}
} else {
- assert(isScalarExtractInstr(MI));
+ assert(isScalarExtractInstr(MI) || isVExtractInstr(MI));
// Pick a random value for state tracking purposes, will be ignored via
// the demanded fields mechanism
InstrInfo.setAVLImm(1);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
index 147f89850765a..2597c9d3a7067 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
@@ -125,3 +125,58 @@ def RI_VEXTRACT : CustomRivosXVI<0b010111, OPMVV, (outs GPR:$rd),
(ins VR:$vs2, uimm5:$imm),
"ri.vextract.x.v", "$rd, $vs2, $imm">;
}
+
+
+def ri_vextract : SDNode<"RISCVISD::RI_VEXTRACT",
+ SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<1>,
+ SDTCisInt<2>,
+ SDTCisInt<1>]>>;
+
+def ri_vinsert_vl : SDNode<"RISCVISD::RI_VINSERT_VL",
+ SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
+ SDTCisInt<0>,
+ SDTCisVT<2, XLenVT>,
+ SDTCisVT<3, XLenVT>,
+ SDTCisVT<4, XLenVT>]>>;
+
+
+let Predicates = [HasVendorXRivosVisni], mayLoad = 0, mayStore = 0,
+ hasSideEffects = 0, HasSEWOp = 1 in
+foreach m = MxList in {
+ defvar mx = m.MX;
+ let VLMul = m.value in {
+ let BaseInstr = RI_VEXTRACT in
+ def PseudoRI_VEXTRACT_ # mx :
+ Pseudo<(outs GPR:$rd), (ins m.vrclass:$rs2, uimm6:$idx, ixlenimm:$sew),
+ []>,
+ RISCVVPseudo;
+
+ let HasVLOp = 1, BaseInstr = RI_VINSERT, HasVecPolicyOp = 1,
+ Constraints = "$rd = $rs1" in
+ def PseudoRI_VINSERT_ # mx :
+ Pseudo<(outs m.vrclass:$rd),
+ (ins m.vrclass:$rs1, GPR:$rs2, uimm5:$idx, AVL:$vl,
+ ixlenimm:$sew, ixlenimm:$policy),
+ []>,
+ RISCVVPseudo;
+ }
+}
+
+
+
+foreach vti = AllIntegerVectors in
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(XLenVT (ri_vextract (vti.Vector vti.RegClass:$vs2), uimm5:$imm)),
+ (!cast<Instruction>("PseudoRI_VEXTRACT_" # vti.LMul.MX)
+ $vs2, uimm5:$imm, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (ri_vinsert_vl (vti.Vector vti.RegClass:$merge),
+ vti.ScalarRegClass:$rs1,
+ uimm5:$imm,
+ VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>("PseudoRI_VINSERT_" # vti.LMul.MX)
+ $merge, vti.ScalarRegClass:$rs1, uimm5:$imm,
+ GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+
+ }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index 7e45136372b6c..75732fe2f7e65 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -7,6 +7,8 @@
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32,RV32M
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64,RV64M
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m,+experimental-xrivosvisni -verify-machineinstrs < %s | FileCheck %s --check-prefixes=VISNI
+
define i8 @extractelt_v16i8(<16 x i8> %a) nounwind {
; CHECK-LABEL: extractelt_v16i8:
; CHECK: # %bb.0:
@@ -14,6 +16,12 @@ define i8 @extractelt_v16i8(<16 x i8> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v16i8:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 7
+; VISNI-NEXT: ret
%b = extractelement <16 x i8> %a, i32 7
ret i8 %b
}
@@ -25,6 +33,12 @@ define i16 @extractelt_v8i16(<8 x i16> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v8i16:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 7
+; VISNI-NEXT: ret
%b = extractelement <8 x i16> %a, i32 7
ret i16 %b
}
@@ -36,6 +50,12 @@ define i32 @extractelt_v4i32(<4 x i32> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v4i32:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 2
+; VISNI-NEXT: ret
%b = extractelement <4 x i32> %a, i32 2
ret i32 %b
}
@@ -55,6 +75,12 @@ define i64 @extractelt_v2i64(<2 x i64> %a) nounwind {
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v2i64:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; VISNI-NEXT: vmv.x.s a0, v8
+; VISNI-NEXT: ret
%b = extractelement <2 x i64> %a, i32 0
ret i64 %b
}
@@ -67,6 +93,13 @@ define bfloat @extractelt_v8bf16(<8 x bfloat> %a) nounwind {
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v8bf16:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 7
+; VISNI-NEXT: fmv.h.x fa0, a0
+; VISNI-NEXT: ret
%b = extractelement <8 x bfloat> %a, i32 7
ret bfloat %b
}
@@ -86,6 +119,13 @@ define half @extractelt_v8f16(<8 x half> %a) nounwind {
; ZVFHMIN-NEXT: vmv.x.s a0, v8
; ZVFHMIN-NEXT: fmv.h.x fa0, a0
; ZVFHMIN-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v8f16:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: vslidedown.vi v8, v8, 7
+; VISNI-NEXT: vfmv.f.s fa0, v8
+; VISNI-NEXT: ret
%b = extractelement <8 x half> %a, i32 7
ret half %b
}
@@ -97,6 +137,13 @@ define float @extractelt_v4f32(<4 x float> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v4f32:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; VISNI-NEXT: vslidedown.vi v8, v8, 2
+; VISNI-NEXT: vfmv.f.s fa0, v8
+; VISNI-NEXT: ret
%b = extractelement <4 x float> %a, i32 2
ret float %b
}
@@ -107,6 +154,12 @@ define double @extractelt_v2f64(<2 x double> %a) nounwind {
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v2f64:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; VISNI-NEXT: vfmv.f.s fa0, v8
+; VISNI-NEXT: ret
%b = extractelement <2 x double> %a, i32 0
ret double %b
}
@@ -118,6 +171,12 @@ define i8 @extractelt_v32i8(<32 x i8> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v32i8:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 7
+; VISNI-NEXT: ret
%b = extractelement <32 x i8> %a, i32 7
ret i8 %b
}
@@ -129,6 +188,12 @@ define i16 @extractelt_v16i16(<16 x i16> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v16i16:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 7
+; VISNI-NEXT: ret
%b = extractelement <16 x i16> %a, i32 7
ret i16 %b
}
@@ -140,6 +205,12 @@ define i32 @extractelt_v8i32(<8 x i32> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 6
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v8i32:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 6
+; VISNI-NEXT: ret
%b = extractelement <8 x i32> %a, i32 6
ret i32 %b
}
@@ -161,6 +232,12 @@ define i64 @extractelt_v4i64(<4 x i64> %a) nounwind {
; RV64-NEXT: vslidedown.vi v8, v8, 3
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v4i64:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 3
+; VISNI-NEXT: ret
%b = extractelement <4 x i64> %a, i32 3
ret i64 %b
}
@@ -173,6 +250,13 @@ define bfloat @extractelt_v16bf16(<16 x bfloat> %a) nounwind {
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v16bf16:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 7
+; VISNI-NEXT: fmv.h.x fa0, a0
+; VISNI-NEXT: ret
%b = extractelement <16 x bfloat> %a, i32 7
ret bfloat %b
}
@@ -192,6 +276,13 @@ define half @extractelt_v16f16(<16 x half> %a) nounwind {
; ZVFHMIN-NEXT: vmv.x.s a0, v8
; ZVFHMIN-NEXT: fmv.h.x fa0, a0
; ZVFHMIN-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v16f16:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: vslidedown.vi v8, v8, 7
+; VISNI-NEXT: vfmv.f.s fa0, v8
+; VISNI-NEXT: ret
%b = extractelement <16 x half> %a, i32 7
ret half %b
}
@@ -203,6 +294,13 @@ define float @extractelt_v8f32(<8 x float> %a) nounwind {
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v8f32:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; VISNI-NEXT: vslidedown.vi v8, v8, 2
+; VISNI-NEXT: vfmv.f.s fa0, v8
+; VISNI-NEXT: ret
%b = extractelement <8 x float> %a, i32 2
ret float %b
}
@@ -213,6 +311,12 @@ define double @extractelt_v4f64(<4 x double> %a) nounwind {
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v4f64:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; VISNI-NEXT: vfmv.f.s fa0, v8
+; VISNI-NEXT: ret
%b = extractelement <4 x double> %a, i32 0
ret double %b
}
@@ -237,6 +341,12 @@ define i64 @extractelt_v3i64(<3 x i64> %a) nounwind {
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v3i64:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 2
+; VISNI-NEXT: ret
%b = extractelement <3 x i64> %a, i32 2
ret i64 %b
}
@@ -278,6 +388,12 @@ define i32 @extractelt_v32i32(<32 x i32> %a) nounwind {
; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 256
; RV64-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v32i32:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v8, 31
+; VISNI-NEXT: ret
%b = extractelement <32 x i32> %a, i32 31
ret i32 %b
}
@@ -319,6 +435,12 @@ define i32 @extractelt_v64i32(<64 x i32> %a) nounwind {
; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 256
; RV64-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v64i32:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; VISNI-NEXT: ri.vextract.x.v a0, v16, 31
+; VISNI-NEXT: ret
%b = extractelement <64 x i32> %a, i32 63
ret i32 %b
}
@@ -330,6 +452,13 @@ define i8 @extractelt_v16i8_idx(<16 x i8> %a, i32 zeroext %idx) nounwind {
; CHECK-NEXT: vslidedown.vx v8, v8, a0
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v16i8_idx:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; VISNI-NEXT: vslidedown.vx v8, v8, a0
+; VISNI-NEXT: vmv.x.s a0, v8
+; VISNI-NEXT: ret
%b = extractelement <16 x i8> %a, i32 %idx
ret i8 %b
}
@@ -341,6 +470,13 @@ define i16 @extractelt_v8i16_idx(<8 x i16> %a, i32 zeroext %idx) nounwind {
; CHECK-NEXT: vslidedown.vx v8, v8, a0
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v8i16_idx:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; VISNI-NEXT: vslidedown.vx v8, v8, a0
+; VISNI-NEXT: vmv.x.s a0, v8
+; VISNI-NEXT: ret
%b = extractelement <8 x i16> %a, i32 %idx
ret i16 %b
}
@@ -353,6 +489,14 @@ define i32 @extractelt_v4i32_idx(<4 x i32> %a, i32 zeroext %idx) nounwind {
; CHECK-NEXT: vslidedown.vx v8, v8, a0
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v4i32_idx:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; VISNI-NEXT: vadd.vv v8, v8, v8
+; VISNI-NEXT: vslidedown.vx v8, v8, a0
+; VISNI-NEXT: vmv.x.s a0, v8
+; VISNI-NEXT: ret
%b = add <4 x i32> %a, %a
%c = extractelement <4 x i32> %b, i32 %idx
ret i32 %c
@@ -378,6 +522,14 @@ define i64 @extractelt_v2i64_idx(<2 x i64> %a, i32 zeroext %idx) nounwind {
; RV64-NEXT: vslidedown.vx v8, v8, a0
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
+;
+; VISNI-LABEL: extractelt_v2i64_idx:
+; VISNI: # %bb.0:
+; VISNI-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; VISNI-NEXT: vadd.vv v8, v8, v8
+; VISNI-NEXT: vslide...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/136708
More information about the llvm-commits
mailing list