[llvm] 5aa5a2f - [RISCV] Disable exact VLEN splitting for bitrotate shuffles (#79468)

Thu Jan 25 10:06:19 PST 2024

Author: Philip Reames
Date: 2024-01-25T10:06:14-08:00
New Revision: 5aa5a2f1b77d93a9f9ed158172277ee41ac224a9

URL: https://github.com/llvm/llvm-project/commit/5aa5a2f1b77d93a9f9ed158172277ee41ac224a9
DIFF: https://github.com/llvm/llvm-project/commit/5aa5a2f1b77d93a9f9ed158172277ee41ac224a9.diff

LOG: [RISCV] Disable exact VLEN splitting for bitrotate shuffles (#79468)

If we have a bitrotate shuffle, this is also by definition a vreg
splitable shuffle when exact VLEN is known. However, there's no profit
to be had from splitting the wider bitrotate lowering into individual m1
pieces. We'd rather leave it the higher lmul to reduce code size.

This is a general problem for any linear-in-LMUL shuffle expansions when
the vreg splitting still has to do linear work per piece. On first
reflection it seems like element rotation might have the same
interaction, but in that case, splitting can be done via a set of whole
register moves (which may get folded into the consumer depending) which
at least as good as a pair of slideup/slidedown. I think that bitrotate
is the only shuffle expansion we have that actually needs handled here.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d38c8f2088d773..b883a9b6070ee5 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4616,26 +4616,38 @@ static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,
   return Res;
 }
 
-// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
-// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
-// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
-static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
-                                           SelectionDAG &DAG,
-                                           const RISCVSubtarget &Subtarget) {
+static bool isLegalBitRotate(ShuffleVectorSDNode *SVN,
+                             SelectionDAG &DAG,
+                             const RISCVSubtarget &Subtarget,
+                             MVT &RotateVT, unsigned &RotateAmt) {
   SDLoc DL(SVN);
 
   EVT VT = SVN->getValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
   unsigned EltSizeInBits = VT.getScalarSizeInBits();
-  unsigned NumSubElts, RotateAmt;
+  unsigned NumSubElts;
   if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
                                           NumElts, NumSubElts, RotateAmt))
-    return SDValue();
-  MVT RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
+    return false;
+  RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
                                   NumElts / NumSubElts);
 
   // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
-  if (!Subtarget.getTargetLowering()->isTypeLegal(RotateVT))
+  return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
+}
+
+// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
+// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
+// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
+static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
+                                           SelectionDAG &DAG,
+                                           const RISCVSubtarget &Subtarget) {
+  SDLoc DL(SVN);
+
+  EVT VT = SVN->getValueType(0);
+  unsigned RotateAmt;
+  MVT RotateVT;
+  if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
     return SDValue();
 
   SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
@@ -4672,6 +4684,13 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
   if (MinVLen != MaxVLen || VT.getSizeInBits().getFixedValue() <= MinVLen)
     return SDValue();
 
+  // Avoid picking up bitrotate patterns which we have a linear-in-lmul
+  // expansion for.
+  unsigned RotateAmt;
+  MVT RotateVT;
+  if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
+    return SDValue();
+
   MVT ElemVT = VT.getVectorElementType();
   unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
   unsigned VRegsPerSrc = NumElts / ElemsPerVReg;

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
index cd84844438933d..4c84cf350c40ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
@@ -858,37 +858,30 @@ define <8 x float> @shuffle_v8f32_as_i64_exact(<8 x float> %v) vscale_range(2,2)
 ; RV32-LABEL: shuffle_v8f32_as_i64_exact:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    li a0, 32
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a0
-; RV32-NEXT:    vrsub.vi v11, v10, 0
 ; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vand.vx v11, v11, a0
-; RV32-NEXT:    vsrl.vv v12, v8, v11
+; RV32-NEXT:    vand.vx v12, v10, a0
+; RV32-NEXT:    vsll.vv v12, v8, v12
+; RV32-NEXT:    vrsub.vi v10, v10, 0
 ; RV32-NEXT:    vand.vx v10, v10, a0
-; RV32-NEXT:    vsll.vv v8, v8, v10
-; RV32-NEXT:    vor.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vv v11, v9, v11
-; RV32-NEXT:    vsll.vv v9, v9, v10
-; RV32-NEXT:    vor.vv v9, v9, v11
+; RV32-NEXT:    vsrl.vv v8, v8, v10
+; RV32-NEXT:    vor.vv v8, v12, v8
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: shuffle_v8f32_as_i64_exact:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vsrl.vx v10, v8, a0
 ; RV64-NEXT:    vsll.vx v8, v8, a0
 ; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vx v10, v9, a0
-; RV64-NEXT:    vsll.vx v9, v9, a0
-; RV64-NEXT:    vor.vv v9, v9, v10
 ; RV64-NEXT:    ret
 ;
 ; ZVKB-V-LABEL: shuffle_v8f32_as_i64_exact:
 ; ZVKB-V:       # %bb.0:
-; ZVKB-V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; ZVKB-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; ZVKB-V-NEXT:    vror.vi v8, v8, 32
-; ZVKB-V-NEXT:    vror.vi v9, v9, 32
 ; ZVKB-V-NEXT:    ret
 ;
 ; ZVKB-ZVE32X-LABEL: shuffle_v8f32_as_i64_exact: