[llvm] [DAGCombiner] Fold patterm for srl-shl-zext (PR #138290)

Fri May 2 07:57:46 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-nvptx

Author: Alexander Peskov (apeskov)

<details>
<summary>Changes</summary>

Fold `(srl (lop x, (shl (zext y), c1)), c1) -> (lop (srl x, c1), (zext y))` where c1 <= leadingzeros(zext(y)).

This is equivalent of existing fold chain `(srl (shl (zext y), c1), c1) -> (and (zext y), mask) -> (zext y)`, but logical op in the middle prevents it from combining.

Profit : Allow to reduce the number of instructions.

---
Full diff: https://github.com/llvm/llvm-project/pull/138290.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+33) 
- (added) llvm/test/CodeGen/NVPTX/shift-opt.ll (+40) 


``````````diff

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ea1435c3934be..9ddac013be280 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10979,6 +10979,39 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
   }
 
+  // fold (srl (or x, (shl (zext y), c1)), c1) -> (or (srl x, c1), (zext y))
+  // c1 <= leadingzeros(zext(y))
+  if (N1C && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND ||
+              N0.getOpcode() == ISD::XOR)) {
+    SDValue lhs = N0.getOperand(0);
+    SDValue rhs = N0.getOperand(1);
+    SDValue shl;
+    SDValue other;
+    if (lhs.getOpcode() == ISD::SHL) {
+      shl = lhs;
+      other = rhs;
+    } else if (rhs.getOpcode() == ISD::SHL) {
+      shl = rhs;
+      other = lhs;
+    }
+    if (shl.getNode()) {
+      if (shl.getOperand(1).getNode() == N1C) {
+        SDValue zext = shl.getOperand(0);
+        if (zext.getOpcode() == ISD::ZERO_EXTEND) {
+          unsigned numLeadingZeros =
+              zext.getValueType().getSizeInBits() -
+              zext.getOperand(0).getValueType().getSizeInBits();
+          if (N1C->getZExtValue() <= numLeadingZeros) {
+            return DAG.getNode(
+                N0.getOpcode(), SDLoc(N0), VT,
+                DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
+                zext);
+          }
+        }
+      }
+    }
+  }
+
   // fold operands of srl based on knowledge that the low bits are not
   // demanded.
   if (SimplifyDemandedBits(SDValue(N, 0)))
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
new file mode 100644
index 0000000000000..6686e8d840c6b
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+
+define i64 @test1(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test1
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test1_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test1_param_1];
+; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: or.b64       %[[OR:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[OR]];
+;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 5
+  %or = or i64 %x, %shl
+  %srl = lshr i64 %or, 5
+  ret i64 %srl
+}
+
+define i64 @test2(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 > leadingzeros(zext(y)).
+;
+; CHECK-LABEL: test2
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test2_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test2_param_1];
+; CHECK: shl.b64      %[[SHL:rd[0-9]+]], %[[Y]], 33;
+; CHECK: or.b64       %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
+; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[OR]], 33;
+; CHECK: st.param.b64 [func_retval0], %[[SHR]];
+;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 33
+  %or = or i64 %x, %shl
+  %srl = lshr i64 %or, 33
+  ret i64 %srl
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/138290