[llvm] 0803dba - [RISCV] Add fixed-length vector instrinsics for segment load
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 10 00:33:36 PST 2022
Author: Luke
Date: 2022-03-10T16:23:40+08:00
New Revision: 0803dba7dd998ad073d75a32b65296734c10ae70
URL: https://github.com/llvm/llvm-project/commit/0803dba7dd998ad073d75a32b65296734c10ae70
DIFF: https://github.com/llvm/llvm-project/commit/0803dba7dd998ad073d75a32b65296734c10ae70.diff
LOG: [RISCV] Add fixed-length vector instrinsics for segment load
Inspired by reviews.llvm.org/D107790.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D119834
Added:
llvm/test/CodeGen/RISCV/rvv/fixed-vector-segN-load.ll
Modified:
llvm/include/llvm/IR/IntrinsicsRISCV.td
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index b6a0c0b6c8ba5..652ae976e85d7 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1499,6 +1499,16 @@ let TargetPrefix = "riscv" in {
[llvm_anyvector_ty, llvm_anyptr_ty,
llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[NoCapture<ArgIndex<1>>, IntrWriteMem]>;
+
+ // Segment loads for fixed vectors.
+ foreach nf = [2, 3, 4, 5, 6, 7, 8] in {
+ def int_riscv_seg # nf # _load
+ : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
+ !add(nf, -1))),
+ [llvm_anyptr_ty, llvm_anyint_ty],
+ [NoCapture<ArgIndex<0>>, IntrReadMem]>;
+ }
+
} // TargetPrefix = "riscv"
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 589c26cc0ba5b..9a15e98ff9909 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1126,6 +1126,24 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.size = MemoryLocation::UnknownSize;
Info.flags |= MachineMemOperand::MOStore;
return true;
+ case Intrinsic::riscv_seg2_load:
+ case Intrinsic::riscv_seg3_load:
+ case Intrinsic::riscv_seg4_load:
+ case Intrinsic::riscv_seg5_load:
+ case Intrinsic::riscv_seg6_load:
+ case Intrinsic::riscv_seg7_load:
+ case Intrinsic::riscv_seg8_load:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.memVT =
+ getValueType(DL, I.getType()->getStructElementType(0)->getScalarType());
+ Info.align =
+ Align(DL.getTypeSizeInBits(
+ I.getType()->getStructElementType(0)->getScalarType()) /
+ 8);
+ Info.size = MemoryLocation::UnknownSize;
+ Info.flags |= MachineMemOperand::MOLoad;
+ return true;
}
}
@@ -4878,6 +4896,42 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
return DAG.getMergeValues({Result, Chain}, DL);
}
+ case Intrinsic::riscv_seg2_load:
+ case Intrinsic::riscv_seg3_load:
+ case Intrinsic::riscv_seg4_load:
+ case Intrinsic::riscv_seg5_load:
+ case Intrinsic::riscv_seg6_load:
+ case Intrinsic::riscv_seg7_load:
+ case Intrinsic::riscv_seg8_load: {
+ SDLoc DL(Op);
+ static const Intrinsic::ID VlsegInts[7] = {
+ Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
+ Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
+ Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
+ Intrinsic::riscv_vlseg8};
+ unsigned NF = Op->getNumValues() - 1;
+ assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT VT = Op->getSimpleValueType(0);
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+
+ SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+ SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
+ auto *Load = cast<MemIntrinsicSDNode>(Op);
+ SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
+ ContainerVTs.push_back(MVT::Other);
+ SDVTList VTs = DAG.getVTList(ContainerVTs);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
+ {Load->getChain(), IntID, Op.getOperand(2), VL},
+ Load->getMemoryVT(), Load->getMemOperand());
+ SmallVector<SDValue, 9> Results;
+ for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
+ Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
+ DAG, Subtarget));
+ Results.push_back(Result.getValue(NF));
+ return DAG.getMergeValues(Results, DL);
+ }
}
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-segN-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-segN-load.ll
new file mode 100644
index 0000000000000..c6edc79f96368
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-segN-load.ll
@@ -0,0 +1,129 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv64 -mattr=+zve64x -riscv-v-vector-bits-min=128 < %s \
+; RUN: | FileCheck %s
+
+define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
+; CHECK-LABEL: load_factor2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg2e8.v v7, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <16 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8> } %2, 1
+ ret <8 x i8> %4
+}
+
+define <8 x i8> @load_factor3(<24 x i8>* %ptr) {
+; CHECK-LABEL: load_factor3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg3e8.v v6, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v6_v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <24 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
+ %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
+ ret <8 x i8> %5
+}
+
+define <8 x i8> @load_factor4(<32 x i8>* %ptr) {
+; CHECK-LABEL: load_factor4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg4e8.v v5, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v5_v6_v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <32 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
+ %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
+ %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
+ ret <8 x i8> %6
+}
+
+define <8 x i8> @load_factor5(<40 x i8>* %ptr) {
+; CHECK-LABEL: load_factor5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg5e8.v v4, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v4_v5_v6_v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <40 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
+ %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
+ %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
+ %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4
+ ret <8 x i8> %7
+}
+
+define <8 x i8> @load_factor6(<48 x i8>* %ptr) {
+; CHECK-LABEL: load_factor6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg6e8.v v3, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v3_v4_v5_v6_v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <48 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
+ %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
+ %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
+ %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4
+ %8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 5
+ ret <8 x i8> %8
+}
+
+define <8 x i8> @load_factor7(<56 x i8>* %ptr) {
+; CHECK-LABEL: load_factor7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg7e8.v v2, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v2_v3_v4_v5_v6_v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <56 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
+ %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
+ %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
+ %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4
+ %8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 5
+ %9 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 6
+ ret <8 x i8> %9
+}
+
+define <8 x i8> @load_factor8(<64 x i8>* %ptr) {
+; CHECK-LABEL: load_factor8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vlseg8e8.v v1, (a0)
+; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v1_v2_v3_v4_v5_v6_v7_v8
+; CHECK-NEXT: ret
+ %1 = bitcast <64 x i8>* %ptr to i8*
+ %2 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0i8.i64(i8* %1, i64 8)
+ %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 0
+ %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 1
+ %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 2
+ %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 3
+ %7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 4
+ %8 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 5
+ %9 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 6
+ %10 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %2, 7
+ ret <8 x i8> %10
+}
+declare { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0i8.i64(i8*, i64)
+declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0i8.i64(i8*, i64)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0i8.i64(i8*, i64)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0i8.i64(i8*, i64)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0i8.i64(i8*, i64)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0i8.i64(i8*, i64)
+declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0i8.i64(i8*, i64)
More information about the llvm-commits
mailing list