[llvm] 8b10ffa - [RISCV] Disable <vscale x 1 x *> types with Zve32x or Zve32f.

Thu Jun 23 08:49:31 PDT 2022

Author: Craig Topper
Date: 2022-06-23T08:49:18-07:00
New Revision: 8b10ffabae48ae6eae5ece711c13b93f9c232515

URL: https://github.com/llvm/llvm-project/commit/8b10ffabae48ae6eae5ece711c13b93f9c232515
DIFF: https://github.com/llvm/llvm-project/commit/8b10ffabae48ae6eae5ece711c13b93f9c232515.diff

LOG: [RISCV] Disable <vscale x 1 x *> types with Zve32x or Zve32f.

According to the vector spec, mf8 is not supported for i8 if ELEN
is 32. Similarily mf4 is not suported for i16/f16 or mf2 for i32/f32.

Since RVVBitsPerBlock is 64 and LMUL is calculated as
((MinNumElements * ElementSize) / RVVBitsPerBlock) this means we
need to disable any type with MinNumElements==1.

For generic IR, these types will now be widened in type legalization.
For RVV intrinsics, we'll probably hit a fatal error somewhere. I plan
to work on disabling the intrinsics in the riscv_vector.h header.

Reviewed By: arcbbb

Differential Revision: https://reviews.llvm.org/D128286

Added: 
    llvm/test/CodeGen/RISCV/rvv/zve32-types.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 999d25581ba6c..d497c1803d7bf 100644

--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4015,7 +4015,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   EVT WidenEltVT = WidenVT.getVectorElementType();
   EVT VT = WidenVT;
-  unsigned NumElts =  VT.getVectorNumElements();
+  unsigned NumElts = VT.getVectorMinNumElements();
   const SDNodeFlags Flags = N->getFlags();
   while (!TLI.isTypeLegal(VT) && NumElts != 1) {
     NumElts = NumElts / 2;
@@ -4029,6 +4029,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
     return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
   }
 
+  // FIXME: Improve support for scalable vectors.
+  assert(!VT.isScalableVector() && "Scalable vectors not handled yet.");
+
   // No legal vector version so unroll the vector operation and then widen.
   if (NumElts == 1)
     return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5ef1a6cc38d4a..0a4cd380fb02f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -112,6 +112,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
   if (Subtarget.hasVInstructions()) {
     auto addRegClassForRVV = [this](MVT VT) {
+      // Disable the smallest fractional LMUL types if ELEN is less than
+      // RVVBitsPerBlock.
+      unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
+      if (VT.getVectorMinNumElements() < MinElts)
+        return;
+
       unsigned Size = VT.getSizeInBits().getKnownMinValue();
       const TargetRegisterClass *RC;
       if (Size <= RISCV::RVVBitsPerBlock)
@@ -472,6 +478,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     }
 
     for (MVT VT : BoolVecVTs) {
+      if (!isTypeLegal(VT))
+        continue;
+
       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
 
       // Mask VTs are custom-expanded into a series of standard nodes
@@ -519,8 +528,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     }
 
     for (MVT VT : IntVecVTs) {
-      if (VT.getVectorElementType() == MVT::i64 &&
-          !Subtarget.hasVInstructionsI64())
+      if (!isTypeLegal(VT))
         continue;
 
       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
@@ -704,21 +712,31 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
           }
         };
 
-    if (Subtarget.hasVInstructionsF16())
-      for (MVT VT : F16VecVTs)
+    if (Subtarget.hasVInstructionsF16()) {
+      for (MVT VT : F16VecVTs) {
+        if (!isTypeLegal(VT))
+          continue;
         SetCommonVFPActions(VT);
+      }
+    }
 
-    for (MVT VT : F32VecVTs) {
-      if (Subtarget.hasVInstructionsF32())
+    if (Subtarget.hasVInstructionsF32()) {
+      for (MVT VT : F32VecVTs) {
+        if (!isTypeLegal(VT))
+          continue;
         SetCommonVFPActions(VT);
-      SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+        SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+      }
     }
 
-    for (MVT VT : F64VecVTs) {
-      if (Subtarget.hasVInstructionsF64())
+    if (Subtarget.hasVInstructionsF64()) {
+      for (MVT VT : F64VecVTs) {
+        if (!isTypeLegal(VT))
+          continue;
         SetCommonVFPActions(VT);
-      SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
-      SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
+        SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+        SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
+      }
     }
 
     if (Subtarget.useRVVForFixedLengthVectors()) {

diff  --git a/llvm/test/CodeGen/RISCV/rvv/zve32-types.ll b/llvm/test/CodeGen/RISCV/rvv/zve32-types.ll
new file mode 100644
index 0000000000000..5648199bb2c7b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/zve32-types.ll
@@ -0,0 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zve32f,+f,+zvl64b \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zve32f,+f,+zvl64b \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+; Sanity check that type legalization kicks in for vscale x 1 types with Zve32.
+
+; NOTE: The load and store are widened by using VP_LOAD/STORE. The add/fadd are
+; widened by using the next larger LMUL and operating on the whole vector. This
+; isn't optimal, but doesn't crash.
+
+define void @vadd_vv_nxv1i8(<vscale x 1 x i8>* %pa, <vscale x 1 x i8>* %pb) {
+; CHECK-LABEL: vadd_vv_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 3
+; CHECK-NEXT:    vsetvli zero, a2, e8, mf4, ta, mu
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v9, (a1)
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    vsetvli zero, a2, e8, mf4, ta, mu
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
+  %va = load <vscale x 1 x i8>, <vscale x 1 x i8>* %pa
+  %vb = load <vscale x 1 x i8>, <vscale x 1 x i8>* %pb
+  %vc = add <vscale x 1 x i8> %va, %vb
+  store <vscale x 1 x i8> %vc, <vscale x 1 x i8>* %pa
+  ret void
+}
+
+define void @vadd_vv_nxv1i16(<vscale x 1 x i16>* %pa, <vscale x 1 x i16>* %pb) {
+; CHECK-LABEL: vadd_vv_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 3
+; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vle16.v v9, (a1)
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, mu
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %va = load <vscale x 1 x i16>, <vscale x 1 x i16>* %pa
+  %vb = load <vscale x 1 x i16>, <vscale x 1 x i16>* %pb
+  %vc = add <vscale x 1 x i16> %va, %vb
+  store <vscale x 1 x i16> %vc, <vscale x 1 x i16>* %pa
+  ret void
+}
+
+define void @vadd_vv_nxv1i32(<vscale x 1 x i32>* %pa, <vscale x 1 x i32>* %pb) {
+; CHECK-LABEL: vadd_vv_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 3
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v9, (a1)
+; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %va = load <vscale x 1 x i32>, <vscale x 1 x i32>* %pa
+  %vb = load <vscale x 1 x i32>, <vscale x 1 x i32>* %pb
+  %vc = add <vscale x 1 x i32> %va, %vb
+  store <vscale x 1 x i32> %vc, <vscale x 1 x i32>* %pa
+  ret void
+}
+
+define void @vfadd_vv_nxv1f32(<vscale x 1 x float>* %pa, <vscale x 1 x float>* %pb) {
+; CHECK-LABEL: vfadd_vv_nxv1f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a2, a2, 3
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vle32.v v9, (a1)
+; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vfadd.vv v8, v8, v9
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %va = load <vscale x 1 x float>, <vscale x 1 x float>* %pa
+  %vb = load <vscale x 1 x float>, <vscale x 1 x float>* %pb
+  %vc = fadd <vscale x 1 x float> %va, %vb
+  store <vscale x 1 x float> %vc, <vscale x 1 x float>* %pa
+  ret void
+}