[llvm] 0c4023a - [RISCV] Use Root instead of N throughout the worklist loop in combineBinOp_VLToVWBinOp_VL. (#99416)

Thu Jul 18 08:47:09 PDT 2024

Author: Craig Topper
Date: 2024-07-18T08:47:06-07:00
New Revision: 0c4023ae3b64c54ff51947e9776aee0e963c5635

URL: https://github.com/llvm/llvm-project/commit/0c4023ae3b64c54ff51947e9776aee0e963c5635
DIFF: https://github.com/llvm/llvm-project/commit/0c4023ae3b64c54ff51947e9776aee0e963c5635.diff

LOG: [RISCV] Use Root instead of N throughout the worklist loop in combineBinOp_VLToVWBinOp_VL. (#99416)

We were only checking that the node from the worklist is a supported
root. We weren't checking the strategy or any of its operands unless it
was the original node. For any other node, we just rechecked the
original node's strategy and operands.

The effect of this is that we don't do all of the transformations at
once. Instead, when there were multiple possible nodes to transform we
would only do them as each node was visited by the main DAG combine
worklist.

The test shows a case where we widened an instruction without removing
all of the uses of the vsext. The sext is shared by one node that shares
another sext node with the root another node that doesn't share anything
with the root.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 21193ebe1eb94..e938454b8e642 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14998,8 +14998,8 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
     if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
       return SDValue();
 
-    NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
-    NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
+    NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
+    NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
     auto AppendUsersIfNeeded = [&Worklist,
                                 &Inserted](const NodeExtensionHelper &Op) {
       if (Op.needToPromoteOtherUsers()) {
@@ -15016,18 +15016,18 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
       return SDValue();
 
     SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =
-        NodeExtensionHelper::getSupportedFoldings(N);
+        NodeExtensionHelper::getSupportedFoldings(Root);
 
     assert(!FoldingStrategies.empty() && "Nothing to be folded");
     bool Matched = false;
     for (int Attempt = 0;
-         (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
+         (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
          ++Attempt) {
 
       for (NodeExtensionHelper::CombineToTry FoldingStrategy :
            FoldingStrategies) {
         std::optional<CombineResult> Res =
-            FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
+            FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
         if (Res) {
           Matched = true;
           CombinesToApply.push_back(*Res);

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
index 9d63b8f31a3e8..feb0178569bc7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
@@ -882,3 +882,41 @@ define <2 x i64> @vwmul_vx_v2i64_i64(ptr %x, ptr %y) {
   %g = mul <2 x i64> %e, %f
   ret <2 x i64> %g
 }
+
+define <2 x i16> @vwmul_v2i16_multiuse(ptr %x, ptr %y, ptr %z, ptr %w) {
+; CHECK-LABEL: vwmul_v2i16_multiuse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a1)
+; CHECK-NEXT:    vle8.v v9, (a2)
+; CHECK-NEXT:    vle8.v v10, (a3)
+; CHECK-NEXT:    vle8.v v11, (a0)
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v8, v9
+; CHECK-NEXT:    vsext.vf2 v9, v10
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vwmul.vv v13, v11, v10
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmul.vv v9, v12, v9
+; CHECK-NEXT:    vdivu.vv v8, v12, v8
+; CHECK-NEXT:    vor.vv v9, v13, v9
+; CHECK-NEXT:    vor.vv v8, v9, v8
+; CHECK-NEXT:    ret
+  %a = load <2 x i8>, ptr %x
+  %b = load <2 x i8>, ptr %y
+  %c = load <2 x i8>, ptr %z
+  %d = load <2 x i8>, ptr %w
+
+  %as = sext <2 x i8> %a to <2 x i16>
+  %bs = sext <2 x i8> %b to <2 x i16>
+  %cs = sext <2 x i8> %c to <2 x i16>
+  %ds = sext <2 x i8> %d to <2 x i16>
+
+  %e = mul <2 x i16> %as, %ds
+  %f = mul <2 x i16> %bs, %ds ; shares 1 use with %e
+  %g = udiv <2 x i16> %bs, %cs ; shares 1 use with %f, and no uses with %e
+
+  %h = or <2 x i16> %e, %f
+  %i = or <2 x i16> %h, %g
+  ret <2 x i16> %i
+}