[llvm] [DAGCombiner] Fold patterm for srl-shl-zext (PR #138290)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 2 07:57:46 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Alexander Peskov (apeskov)
<details>
<summary>Changes</summary>
Fold `(srl (lop x, (shl (zext y), c1)), c1) -> (lop (srl x, c1), (zext y))` where c1 <= leadingzeros(zext(y)).
This is equivalent of existing fold chain `(srl (shl (zext y), c1), c1) -> (and (zext y), mask) -> (zext y)`, but logical op in the middle prevents it from combining.
Profit : Allow to reduce the number of instructions.
---
Full diff: https://github.com/llvm/llvm-project/pull/138290.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+33)
- (added) llvm/test/CodeGen/NVPTX/shift-opt.ll (+40)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ea1435c3934be..9ddac013be280 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10979,6 +10979,39 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
}
+ // fold (srl (or x, (shl (zext y), c1)), c1) -> (or (srl x, c1), (zext y))
+ // c1 <= leadingzeros(zext(y))
+ if (N1C && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND ||
+ N0.getOpcode() == ISD::XOR)) {
+ SDValue lhs = N0.getOperand(0);
+ SDValue rhs = N0.getOperand(1);
+ SDValue shl;
+ SDValue other;
+ if (lhs.getOpcode() == ISD::SHL) {
+ shl = lhs;
+ other = rhs;
+ } else if (rhs.getOpcode() == ISD::SHL) {
+ shl = rhs;
+ other = lhs;
+ }
+ if (shl.getNode()) {
+ if (shl.getOperand(1).getNode() == N1C) {
+ SDValue zext = shl.getOperand(0);
+ if (zext.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned numLeadingZeros =
+ zext.getValueType().getSizeInBits() -
+ zext.getOperand(0).getValueType().getSizeInBits();
+ if (N1C->getZExtValue() <= numLeadingZeros) {
+ return DAG.getNode(
+ N0.getOpcode(), SDLoc(N0), VT,
+ DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
+ zext);
+ }
+ }
+ }
+ }
+ }
+
// fold operands of srl based on knowledge that the low bits are not
// demanded.
if (SimplifyDemandedBits(SDValue(N, 0)))
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
new file mode 100644
index 0000000000000..6686e8d840c6b
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+
+define i64 @test1(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test1
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test1_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test1_param_1];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[OR]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 5
+ %or = or i64 %x, %shl
+ %srl = lshr i64 %or, 5
+ ret i64 %srl
+}
+
+define i64 @test2(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 > leadingzeros(zext(y)).
+;
+; CHECK-LABEL: test2
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test2_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test2_param_1];
+; CHECK: shl.b64 %[[SHL:rd[0-9]+]], %[[Y]], 33;
+; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 33;
+; CHECK: st.param.b64 [func_retval0], %[[SHR]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 33
+ %or = or i64 %x, %shl
+ %srl = lshr i64 %or, 33
+ ret i64 %srl
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/138290
More information about the llvm-commits
mailing list