[llvm] 8b10ffa - [RISCV] Disable <vscale x 1 x *> types with Zve32x or Zve32f.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 23 08:49:31 PDT 2022
Author: Craig Topper
Date: 2022-06-23T08:49:18-07:00
New Revision: 8b10ffabae48ae6eae5ece711c13b93f9c232515
URL: https://github.com/llvm/llvm-project/commit/8b10ffabae48ae6eae5ece711c13b93f9c232515
DIFF: https://github.com/llvm/llvm-project/commit/8b10ffabae48ae6eae5ece711c13b93f9c232515.diff
LOG: [RISCV] Disable <vscale x 1 x *> types with Zve32x or Zve32f.
According to the vector spec, mf8 is not supported for i8 if ELEN
is 32. Similarily mf4 is not suported for i16/f16 or mf2 for i32/f32.
Since RVVBitsPerBlock is 64 and LMUL is calculated as
((MinNumElements * ElementSize) / RVVBitsPerBlock) this means we
need to disable any type with MinNumElements==1.
For generic IR, these types will now be widened in type legalization.
For RVV intrinsics, we'll probably hit a fatal error somewhere. I plan
to work on disabling the intrinsics in the riscv_vector.h header.
Reviewed By: arcbbb
Differential Revision: https://reviews.llvm.org/D128286
Added:
llvm/test/CodeGen/RISCV/rvv/zve32-types.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 999d25581ba6c..d497c1803d7bf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4015,7 +4015,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
- unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumElts = VT.getVectorMinNumElements();
const SDNodeFlags Flags = N->getFlags();
while (!TLI.isTypeLegal(VT) && NumElts != 1) {
NumElts = NumElts / 2;
@@ -4029,6 +4029,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
}
+ // FIXME: Improve support for scalable vectors.
+ assert(!VT.isScalableVector() && "Scalable vectors not handled yet.");
+
// No legal vector version so unroll the vector operation and then widen.
if (NumElts == 1)
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5ef1a6cc38d4a..0a4cd380fb02f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -112,6 +112,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasVInstructions()) {
auto addRegClassForRVV = [this](MVT VT) {
+ // Disable the smallest fractional LMUL types if ELEN is less than
+ // RVVBitsPerBlock.
+ unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
+ if (VT.getVectorMinNumElements() < MinElts)
+ return;
+
unsigned Size = VT.getSizeInBits().getKnownMinValue();
const TargetRegisterClass *RC;
if (Size <= RISCV::RVVBitsPerBlock)
@@ -472,6 +478,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
for (MVT VT : BoolVecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
+
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
// Mask VTs are custom-expanded into a series of standard nodes
@@ -519,8 +528,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
for (MVT VT : IntVecVTs) {
- if (VT.getVectorElementType() == MVT::i64 &&
- !Subtarget.hasVInstructionsI64())
+ if (!isTypeLegal(VT))
continue;
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
@@ -704,21 +712,31 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
};
- if (Subtarget.hasVInstructionsF16())
- for (MVT VT : F16VecVTs)
+ if (Subtarget.hasVInstructionsF16()) {
+ for (MVT VT : F16VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
SetCommonVFPActions(VT);
+ }
+ }
- for (MVT VT : F32VecVTs) {
- if (Subtarget.hasVInstructionsF32())
+ if (Subtarget.hasVInstructionsF32()) {
+ for (MVT VT : F32VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
SetCommonVFPActions(VT);
- SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+ SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+ }
}
- for (MVT VT : F64VecVTs) {
- if (Subtarget.hasVInstructionsF64())
+ if (Subtarget.hasVInstructionsF64()) {
+ for (MVT VT : F64VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
SetCommonVFPActions(VT);
- SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
- SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
+ SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+ SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
+ }
}
if (Subtarget.useRVVForFixedLengthVectors()) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/zve32-types.ll b/llvm/test/CodeGen/RISCV/rvv/zve32-types.ll
new file mode 100644
index 0000000000000..5648199bb2c7b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/zve32-types.ll
@@ -0,0 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zve32f,+f,+zvl64b \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zve32f,+f,+zvl64b \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+; Sanity check that type legalization kicks in for vscale x 1 types with Zve32.
+
+; NOTE: The load and store are widened by using VP_LOAD/STORE. The add/fadd are
+; widened by using the next larger LMUL and operating on the whole vector. This
+; isn't optimal, but doesn't crash.
+
+define void @vadd_vv_nxv1i8(<vscale x 1 x i8>* %pa, <vscale x 1 x i8>* %pb) {
+; CHECK-LABEL: vadd_vv_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 3
+; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle8.v v9, (a1)
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+ %va = load <vscale x 1 x i8>, <vscale x 1 x i8>* %pa
+ %vb = load <vscale x 1 x i8>, <vscale x 1 x i8>* %pb
+ %vc = add <vscale x 1 x i8> %va, %vb
+ store <vscale x 1 x i8> %vc, <vscale x 1 x i8>* %pa
+ ret void
+}
+
+define void @vadd_vv_nxv1i16(<vscale x 1 x i16>* %pa, <vscale x 1 x i16>* %pb) {
+; CHECK-LABEL: vadd_vv_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 3
+; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %va = load <vscale x 1 x i16>, <vscale x 1 x i16>* %pa
+ %vb = load <vscale x 1 x i16>, <vscale x 1 x i16>* %pb
+ %vc = add <vscale x 1 x i16> %va, %vb
+ store <vscale x 1 x i16> %vc, <vscale x 1 x i16>* %pa
+ ret void
+}
+
+define void @vadd_vv_nxv1i32(<vscale x 1 x i32>* %pa, <vscale x 1 x i32>* %pb) {
+; CHECK-LABEL: vadd_vv_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 3
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vle32.v v9, (a1)
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+ %va = load <vscale x 1 x i32>, <vscale x 1 x i32>* %pa
+ %vb = load <vscale x 1 x i32>, <vscale x 1 x i32>* %pb
+ %vc = add <vscale x 1 x i32> %va, %vb
+ store <vscale x 1 x i32> %vc, <vscale x 1 x i32>* %pa
+ ret void
+}
+
+define void @vfadd_vv_nxv1f32(<vscale x 1 x float>* %pa, <vscale x 1 x float>* %pb) {
+; CHECK-LABEL: vfadd_vv_nxv1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a2, a2, 3
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vle32.v v9, (a1)
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
+ %va = load <vscale x 1 x float>, <vscale x 1 x float>* %pa
+ %vb = load <vscale x 1 x float>, <vscale x 1 x float>* %pb
+ %vc = fadd <vscale x 1 x float> %va, %vb
+ store <vscale x 1 x float> %vc, <vscale x 1 x float>* %pa
+ ret void
+}
More information about the llvm-commits
mailing list