[PATCH] D133739: [RISCV][WIP] Form more VW instructions

Mon Sep 12 17:27:15 PDT 2022

qcolombet created this revision.
qcolombet added reviewers: craig.topper, dcaballe.
Herald added subscribers: sunshaoce, VincentWu, luke957, StephenFan, vkmr, frasercrmck, evandro, luismarques, apazos, sameer.abuasal, s.egerton, Jim, benna, psnobl, jocewei, PkmX, the_o, brucehoult, MartinMosbeck, rogfer01, edward-jones, zzheng, jrtc27, shiva0217, kito-cheng, niosHD, sabuasal, simoncook, johnrusso, rbar, asb, hiraditya, arichardson.
Herald added a project: All.
qcolombet requested review of this revision.
Herald added subscribers: pcwang-thead, eopXD, MaskRay.
Herald added a project: LLVM.

This patch aims at starting a conversation about how people think we should approach forming VW variants (operations that widen their inputs arguments) more aggressively.

Currently we fold sign/zero extensions in instructions that support widening only when the result of the extension is used only once.
The current (WIP) patch lifts this limitation by checking whether all the users of the extension support the folding and by allowing the transformation when that's the case.

The patch is far from being perfect because it doesn't actually check that the folding will happen for all the instructions (and in true SDISel fashion will be defeated by basic block boundaries) but demonstrates what could be achieved, codegen-wise, with the added test:

  --- old_codegen.s       2022-09-13 00:12:48.989575265 +0000
  +++ new_codegen.s       2022-09-13 00:13:02.134793836 +0000
  @@ -16,30 +16,28 @@
   .Lfunc_end0:
          .size   vwmul_v2i16, .Lfunc_end0-vwmul_v2i16
          .cfi_endproc
                                           # -- End function
          .globl  vwmul_v2i16_multiple_users      # -- Begin function vwmul_v2i16_multiple_users
          .p2align        2
          .type   vwmul_v2i16_multiple_users, at function
   vwmul_v2i16_multiple_users:             # @vwmul_v2i16_multiple_users
          .cfi_startproc
   # %bb.0:
  -       vsetivli        zero, 2, e16, mf4, ta, mu
  +       vsetivli        zero, 2, e8, mf8, ta, mu
          vle8.v  v8, (a0)
          vle8.v  v9, (a1)
          vle8.v  v10, (a2)
  -       vsext.vf2       v11, v8
  -       vsext.vf2       v8, v9
  -       vsext.vf2       v9, v10
  -       vmul.vv v8, v11, v8
  -       vmul.vv v9, v11, v9
  -       vor.vv  v8, v8, v9
  +       vwmul.vv        v11, v8, v9
  +       vwmul.vv        v9, v8, v10
  +       vsetvli zero, zero, e16, mf4, ta, mu
  +       vor.vv  v8, v11, v9

@craig.topper How do you think we should approach forming VW instructions?


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D133739

Files:
  llvm/lib/Target/RISCV/RISCVISelLowering.cpp
  llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll


Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
===================================================================

--- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
+++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
@@ -18,6 +18,30 @@
   ret <2 x i16> %e
 }
 
+define <2 x i16> @vwmul_v2i16_multiple_users(<2 x i8>* %x, <2 x i8>* %y, <2 x i8> *%z) {
+; CHECK-LABEL: vwmul_v2i16_multiple_users:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v9, (a1)
+; CHECK-NEXT:    vle8.v v10, (a2)
+; CHECK-NEXT:    vwmul.vv v11, v8, v9
+; CHECK-NEXT:    vwmul.vv v9, v8, v10
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vor.vv v8, v11, v9
+; CHECK-NEXT:    ret
+  %a = load <2 x i8>, <2 x i8>* %x
+  %b = load <2 x i8>, <2 x i8>* %y
+  %b2 = load <2 x i8>, <2 x i8>* %z
+  %c = sext <2 x i8> %a to <2 x i16>
+  %d = sext <2 x i8> %b to <2 x i16>
+  %d2 = sext <2 x i8> %b2 to <2 x i16>
+  %e = mul <2 x i16> %c, %d
+  %f = mul <2 x i16> %c, %d2
+  %g = or <2 x i16> %e, %f
+  ret <2 x i16> %g
+}
+
 define <4 x i16> @vwmul_v4i16(<4 x i8>* %x, <4 x i8>* %y) {
 ; CHECK-LABEL: vwmul_v4i16:
 ; CHECK:       # %bb.0:
Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8570,6 +8570,25 @@
   return SDValue();
 }
 
+/// Check whether \p N supports doing sign/zero extension of its arguments.
+bool canFoldExtensionInOpcode(const SDNode *N) {
+  switch (N->getOpcode()) {
+  case RISCVISD::ADD_VL:
+  case RISCVISD::SUB_VL:
+  case RISCVISD::MUL_VL:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// Check if all the users of \p Val support sign/zero extending their
+/// arguments.
+bool canFoldExtensionInAllUsers(SDValue Val) {
+  return std::all_of(Val->use_begin(), Val->use_end(),
+                     canFoldExtensionInOpcode);
+}
+
 // Try to form vwadd(u).wv/wx or vwsub(u).wv/wx. It might later be optimized to
 // vwadd(u).vv/vx or vwsub(u).vv/vx.
 static SDValue combineADDSUB_VLToVWADDSUB_VL(SDNode *N, SelectionDAG &DAG,
@@ -8599,7 +8618,8 @@
   // If the RHS is a sext or zext, we can form a widening op.
   if ((Op1.getOpcode() == RISCVISD::VZEXT_VL ||
        Op1.getOpcode() == RISCVISD::VSEXT_VL) &&
-      Op1.hasOneUse() && Op1.getOperand(1) == Mask && Op1.getOperand(2) == VL) {
+      canFoldExtensionInAllUsers(Op1) && Op1.getOperand(1) == Mask &&
+      Op1.getOperand(2) == VL) {
     unsigned ExtOpc = Op1.getOpcode();
     Op1 = Op1.getOperand(0);
     // Re-introduce narrower extends if needed.
@@ -8709,7 +8729,7 @@
   bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
   bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
   bool IsVWMULSU = IsSignExt && Op1.getOpcode() == RISCVISD::VZEXT_VL;
-  if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
+  if ((!IsSignExt && !IsZeroExt) || !canFoldExtensionInAllUsers(Op0))
     return SDValue();
 
   SDValue Merge = N->getOperand(2);
@@ -8731,7 +8751,7 @@
 
   // See if the other operand is the same opcode.
   if (IsVWMULSU || Op0.getOpcode() == Op1.getOpcode()) {
-    if (!Op1.hasOneUse())
+    if (!canFoldExtensionInAllUsers(Op1))
       return SDValue();
 
     // Make sure the mask and VL match.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D133739.459595.patch
Type: text/x-patch
Size: 3397 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220913/40d344f4/attachment.bin>