[llvm] [RISCV] Recognize a zipeven/zipodd requiring larger SEW (PR #134923)

Tue Apr 8 13:06:41 PDT 2025

https://github.com/preames created https://github.com/llvm/llvm-project/pull/134923

This is a follow up to f8ee58a3c, and improves code generation for the XRivosVizip extension.

If we have a slide pair which could be a zipeven or zipodd if the shuffle was widened, widen the shuffle and then mask the zipeven or zipodd.

This is basically working around an order of matching issue; we match the slide pair variants before trying widening. I considered whether we should just widen slide pairs without any consideration of the zip idioms, but the resulting codegen changes look mostly like churn, and have no clear evidence of profitability.

>From d4fc9a5ad9514ec66324c568dcfd3575644a6b00 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Mon, 31 Mar 2025 16:02:16 -0700
Subject: [PATCH] [RISCV] Recognize a zipeven/zipodd requiring larger SEW

This is a follow up to f8ee58a3c, and improves code generation for the
XRivosVizip extension.

If we have a slide pair which could be a zipeven or zipodd if the
shuffle was widened, widen the shuffle and then mask the zipeven or
zipodd.

This is basically working around an order of matching issue; we match
the slide pair variants before trying widening. I considered whether
we should just widen slide pairs without any consideration of the zip
idioms, but the resulting codegen changes look mostly like churn, and
have no clear evidence of profitability.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 58 +++++++++++++------
 .../fixed-vectors-shuffle-zipeven-zipodd.ll   | 15 ++---
 2 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e5f37fc218ed2..8eccad097b3cb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4635,7 +4635,8 @@ static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
 }
 
 static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
-                          ArrayRef<int> Mask, bool RequiredPolarity) {
+                          ArrayRef<int> Mask, unsigned Factor,
+                          bool RequiredPolarity) {
   int NumElts = Mask.size();
   for (int i = 0; i != NumElts; ++i) {
     int M = Mask[i];
@@ -4646,7 +4647,7 @@ static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
     bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
     assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
            "Must match exactly one of the two slides");
-    if (RequiredPolarity != (C == i % 2))
+    if (RequiredPolarity != (C == (i / Factor) % 2))
       return false;
   }
   return true;
@@ -4658,9 +4659,11 @@ static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
 /// vs1: b0 b1 b2 b3
 /// vd:  a0 b0 a2 b2
 static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
-                      ArrayRef<int> Mask) {
-  return SrcInfo[0].second == 0 && SrcInfo[1].second == 1 &&
-         isAlternating(SrcInfo, Mask, true);
+                      ArrayRef<int> Mask, unsigned &Factor) {
+  Factor = SrcInfo[1].second;
+  return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
+         Mask.size() % Factor == 0 &&
+         isAlternating(SrcInfo, Mask, Factor, true);
 }
 
 /// Given a shuffle which can be represented as a pair of two slides,
@@ -4671,9 +4674,11 @@ static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
 /// Note that the operand order is swapped due to the way we canonicalize
 /// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
 static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
-                     ArrayRef<int> Mask) {
-  return SrcInfo[0].second == 0 && SrcInfo[1].second == -1 &&
-         isAlternating(SrcInfo, Mask, false);
+                     ArrayRef<int> Mask, unsigned &Factor) {
+  Factor = -SrcInfo[1].second;
+  return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
+         Mask.size() % Factor == 0 &&
+         isAlternating(SrcInfo, Mask, Factor, false);
 }
 
 // Lower a deinterleave shuffle to SRL and TRUNC.  Factor must be
@@ -5760,16 +5765,33 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
       return convertFromScalableVector(VT, Res, DAG, Subtarget);
     }
 
-    if (Subtarget.hasVendorXRivosVizip() && isZipEven(SrcInfo, Mask)) {
-      SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
-      SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
-      return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
-                       Subtarget);
-    }
-    if (Subtarget.hasVendorXRivosVizip() && isZipOdd(SrcInfo, Mask)) {
-      SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
-      SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
-      return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG, Subtarget);
+    if (Subtarget.hasVendorXRivosVizip()) {
+      bool TryWiden = false;
+      unsigned Factor;
+      if (isZipEven(SrcInfo, Mask, Factor)) {
+        if (Factor == 1) {
+          SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
+          SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
+          return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
+                           Subtarget);
+        }
+        TryWiden = true;
+      }
+      if (isZipOdd(SrcInfo, Mask, Factor)) {
+        if (Factor == 1) {
+          SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
+          SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
+          return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
+                           Subtarget);
+        }
+        TryWiden = true;
+      }
+      // If we found a widening oppurtunity which would let us form a
+      // zipeven or zipodd, use the generic code to widen the shuffle
+      // and recurse through this logic.
+      if (TryWiden)
+        if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
+          return V;
     }
 
     // Build the mask.  Note that vslideup unconditionally preserves elements
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
index 4e08112ef09b6..14c17a65c6a0f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
@@ -365,10 +365,9 @@ define <8 x i32> @zipeven_v8i32_as_v4i64(<8 x i32> %v1, <8 x i32> %v2) {
 ;
 ; ZIP-LABEL: zipeven_v8i32_as_v4i64:
 ; ZIP:       # %bb.0:
-; ZIP-NEXT:    li a0, 204
-; ZIP-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; ZIP-NEXT:    vmv.s.x v0, a0
-; ZIP-NEXT:    vslideup.vi v8, v10, 2, v0.t
+; ZIP-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZIP-NEXT:    ri.vzipeven.vv v12, v8, v10
+; ZIP-NEXT:    vmv.v.v v8, v12
 ; ZIP-NEXT:    ret
   %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13>
   ret <8 x i32> %out
@@ -386,11 +385,9 @@ define <8 x i32> @zipodd_v8i32_as_v4i64(<8 x i32> %v1, <8 x i32> %v2) {
 ;
 ; ZIP-LABEL: zipodd_v8i32_as_v4i64:
 ; ZIP:       # %bb.0:
-; ZIP-NEXT:    li a0, 51
-; ZIP-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; ZIP-NEXT:    vmv.s.x v0, a0
-; ZIP-NEXT:    vslidedown.vi v10, v8, 2, v0.t
-; ZIP-NEXT:    vmv.v.v v8, v10
+; ZIP-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZIP-NEXT:    ri.vzipodd.vv v12, v8, v10
+; ZIP-NEXT:    vmv.v.v v8, v12
 ; ZIP-NEXT:    ret
   %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 2, i32 3, i32 10, i32 11, i32 6, i32 7, i32 14, i32 15>
   ret <8 x i32> %out