[PATCH] D153380: [RISCV] Stop isInterleaveShuffle from producing illegal extract_subvectors.

Tue Jun 20 15:15:16 PDT 2023

craig.topper created this revision.
craig.topper added reviewers: reames, luke, frasercrmck.
Herald added subscribers: jobnoorman, VincentWu, vkmr, luismarques, apazos, sameer.abuasal, s.egerton, Jim, benna, psnobl, jocewei, PkmX, the_o, brucehoult, MartinMosbeck, edward-jones, zzheng, jrtc27, shiva0217, kito-cheng, niosHD, sabuasal, simoncook, johnrusso, rbar, asb, hiraditya, arichardson.
Herald added a project: All.
craig.topper requested review of this revision.
Herald added subscribers: llvm-commits, wangpc, eopXD, MaskRay.
Herald added a project: LLVM.

The definition for ISD::EXTRACT_SUBVECTOR says the index must be
aligned to the known minimum elements of the extracted type. We mostly
got away with this but it turns out there are places that depend on this.

For example, this code in getNode for ISD::EXTRACT_SUBVECTOR

  // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
  // the concat have the same type as the extract.
  if (N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0 &&
      VT == N1.getOperand(0).getValueType()) {
    unsigned Factor = VT.getVectorMinNumElements();
    return N1.getOperand(N2C->getZExtValue() / Factor);
  }

This depends on N2C->getZExtValue() being evenly divisible by Factor.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D153380

Files:
  llvm/lib/Target/RISCV/RISCVISelLowering.cpp
  llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll


Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
===================================================================

--- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -202,23 +202,28 @@
 define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
 ; V128-LABEL: interleave_v4i32_offset_1:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; V128-NEXT:    vslidedown.vi v10, v9, 1
-; V128-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; V128-NEXT:    vwaddu.vv v9, v8, v10
-; V128-NEXT:    li a0, -1
-; V128-NEXT:    vwmaccu.vx v9, a0, v10
-; V128-NEXT:    vmv1r.v v8, v9
+; V128-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; V128-NEXT:    vid.v v10
+; V128-NEXT:    vsrl.vi v11, v10, 1
+; V128-NEXT:    vrgather.vv v10, v8, v11
+; V128-NEXT:    li a0, 10
+; V128-NEXT:    vmv.s.x v0, a0
+; V128-NEXT:    vadd.vi v8, v11, 1
+; V128-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; V128-NEXT:    vmv.v.v v8, v10
 ; V128-NEXT:    ret
 ;
 ; V512-LABEL: interleave_v4i32_offset_1:
 ; V512:       # %bb.0:
-; V512-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; V512-NEXT:    vslidedown.vi v10, v9, 1
-; V512-NEXT:    vwaddu.vv v9, v8, v10
-; V512-NEXT:    li a0, -1
-; V512-NEXT:    vwmaccu.vx v9, a0, v10
-; V512-NEXT:    vmv1r.v v8, v9
+; V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, mu
+; V512-NEXT:    vid.v v10
+; V512-NEXT:    vsrl.vi v11, v10, 1
+; V512-NEXT:    vrgather.vv v10, v8, v11
+; V512-NEXT:    li a0, 10
+; V512-NEXT:    vmv.s.x v0, a0
+; V512-NEXT:    vadd.vi v8, v11, 1
+; V512-NEXT:    vrgather.vv v10, v9, v8, v0.t
+; V512-NEXT:    vmv1r.v v8, v10
 ; V512-NEXT:    ret
   %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 1, i32 6>
   ret <4 x i32> %a
@@ -762,22 +767,19 @@
 define <4 x i8> @unary_interleave_10uu_v4i8(<4 x i8> %x) {
 ; V128-LABEL: unary_interleave_10uu_v4i8:
 ; V128:       # %bb.0:
-; V128-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; V128-NEXT:    vslidedown.vi v10, v8, 1
-; V128-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; V128-NEXT:    vwaddu.vv v9, v10, v8
-; V128-NEXT:    li a0, -1
-; V128-NEXT:    vwmaccu.vx v9, a0, v8
+; V128-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; V128-NEXT:    vid.v v9
+; V128-NEXT:    vrsub.vi v10, v9, 1
+; V128-NEXT:    vrgather.vv v9, v8, v10
 ; V128-NEXT:    vmv1r.v v8, v9
 ; V128-NEXT:    ret
 ;
 ; V512-LABEL: unary_interleave_10uu_v4i8:
 ; V512:       # %bb.0:
-; V512-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; V512-NEXT:    vslidedown.vi v10, v8, 1
-; V512-NEXT:    vwaddu.vv v9, v10, v8
-; V512-NEXT:    li a0, -1
-; V512-NEXT:    vwmaccu.vx v9, a0, v8
+; V512-NEXT:    vsetivli zero, 4, e8, mf8, ta, ma
+; V512-NEXT:    vid.v v9
+; V512-NEXT:    vrsub.vi v10, v9, 1
+; V512-NEXT:    vrgather.vv v9, v8, v10
 ; V512-NEXT:    vmv1r.v v8, v9
 ; V512-NEXT:    ret
   %a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3525,10 +3525,11 @@
   // vectors, or at the start and middle of the first vector if it's an unary
   // interleave.
   // In both cases, HalfNumElts will be extracted.
-  // So make sure that EvenSrc/OddSrc are within range.
+  // We need to ensure that the extract indice is 0 or HalfNumElts otherwise
+  // we'll create an illegal extract_subvector.
+  // FIXME: We could support other values using a slidedown first.
   int HalfNumElts = NumElts / 2;
-  return (((EvenSrc % NumElts) + HalfNumElts) <= NumElts) &&
-         (((OddSrc % NumElts) + HalfNumElts) <= NumElts);
+  return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
 }
 
 /// Match shuffles that concatenate two vectors, rotate the concatenation,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D153380.533060.patch
Type: text/x-patch
Size: 3958 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230620/f35f8def/attachment.bin>