[llvm] [DAGCombiner] Fold pattern for srl-shl-zext (PR #138290)
Alexander Peskov via llvm-commits
llvm-commits at lists.llvm.org
Tue May 6 12:17:35 PDT 2025
https://github.com/apeskov updated https://github.com/llvm/llvm-project/pull/138290
>From 539724b02af3f4755e2db1ad45c9ddb1537445bf Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Mon, 14 Apr 2025 18:05:53 +0400
Subject: [PATCH 1/8] [DAG] Fold patterm for SRL
fold (srl (or x, (shl (zext y), c1), c1) -> (or (srl x, c1), (zext y))
for c1 <= leadingzeros(zext(y))
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 +++++++++++++++
llvm/test/CodeGen/NVPTX/shift-opt.ll | 40 +++++++++++++++++++
2 files changed, 73 insertions(+)
create mode 100644 llvm/test/CodeGen/NVPTX/shift-opt.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ea1435c3934be..9ddac013be280 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10979,6 +10979,39 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
}
+ // fold (srl (or x, (shl (zext y), c1)), c1) -> (or (srl x, c1), (zext y))
+ // c1 <= leadingzeros(zext(y))
+ if (N1C && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND ||
+ N0.getOpcode() == ISD::XOR)) {
+ SDValue lhs = N0.getOperand(0);
+ SDValue rhs = N0.getOperand(1);
+ SDValue shl;
+ SDValue other;
+ if (lhs.getOpcode() == ISD::SHL) {
+ shl = lhs;
+ other = rhs;
+ } else if (rhs.getOpcode() == ISD::SHL) {
+ shl = rhs;
+ other = lhs;
+ }
+ if (shl.getNode()) {
+ if (shl.getOperand(1).getNode() == N1C) {
+ SDValue zext = shl.getOperand(0);
+ if (zext.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned numLeadingZeros =
+ zext.getValueType().getSizeInBits() -
+ zext.getOperand(0).getValueType().getSizeInBits();
+ if (N1C->getZExtValue() <= numLeadingZeros) {
+ return DAG.getNode(
+ N0.getOpcode(), SDLoc(N0), VT,
+ DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
+ zext);
+ }
+ }
+ }
+ }
+ }
+
// fold operands of srl based on knowledge that the low bits are not
// demanded.
if (SimplifyDemandedBits(SDValue(N, 0)))
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
new file mode 100644
index 0000000000000..6686e8d840c6b
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+
+define i64 @test1(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test1
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test1_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test1_param_1];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[OR]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 5
+ %or = or i64 %x, %shl
+ %srl = lshr i64 %or, 5
+ ret i64 %srl
+}
+
+define i64 @test2(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 > leadingzeros(zext(y)).
+;
+; CHECK-LABEL: test2
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test2_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test2_param_1];
+; CHECK: shl.b64 %[[SHL:rd[0-9]+]], %[[Y]], 33;
+; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 33;
+; CHECK: st.param.b64 [func_retval0], %[[SHR]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 33
+ %or = or i64 %x, %shl
+ %srl = lshr i64 %or, 33
+ ret i64 %srl
+}
>From 07a38274a8806b35a10906fcdf863af596000312 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 17:21:34 +0400
Subject: [PATCH 2/8] comments + refactoring
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 27 ++++----
llvm/test/CodeGen/NVPTX/shift-opt.ll | 69 +++++++++++++++++--
2 files changed, 75 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9ddac013be280..4b843c754a976 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10981,8 +10981,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (or x, (shl (zext y), c1)), c1) -> (or (srl x, c1), (zext y))
// c1 <= leadingzeros(zext(y))
- if (N1C && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND ||
- N0.getOpcode() == ISD::XOR)) {
+ if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode())) {
SDValue lhs = N0.getOperand(0);
SDValue rhs = N0.getOperand(1);
SDValue shl;
@@ -10994,19 +10993,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
shl = rhs;
other = lhs;
}
- if (shl.getNode()) {
- if (shl.getOperand(1).getNode() == N1C) {
- SDValue zext = shl.getOperand(0);
- if (zext.getOpcode() == ISD::ZERO_EXTEND) {
- unsigned numLeadingZeros =
- zext.getValueType().getSizeInBits() -
- zext.getOperand(0).getValueType().getSizeInBits();
- if (N1C->getZExtValue() <= numLeadingZeros) {
- return DAG.getNode(
- N0.getOpcode(), SDLoc(N0), VT,
- DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
- zext);
- }
+ if (shl && shl.getOperand(1) == N1) {
+ SDValue zext = shl.getOperand(0);
+ if (zext.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned numLeadingZeros =
+ zext.getValueType().getScalarSizeInBits() -
+ zext.getOperand(0).getValueType().getScalarSizeInBits();
+ if (N1C->getZExtValue() <= numLeadingZeros) {
+ return DAG.getNode(
+ N0.getOpcode(), SDLoc(N0), VT,
+ DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
+ zext);
}
}
}
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 6686e8d840c6b..26850a30d2321 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -1,16 +1,16 @@
; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
-define i64 @test1(i64 %x, i32 %y) {
+define i64 @test_or(i64 %x, i32 %y) {
;
; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
;
-; CHECK-LABEL: test1
-; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test1_param_0];
-; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test1_param_1];
+; CHECK-LABEL: test_or
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_or_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_or_param_1];
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
-; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[SHR]], %[[Y]];
-; CHECK: st.param.b64 [func_retval0], %[[OR]];
+; CHECK: or.b64 %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[LOP]];
;
%ext = zext i32 %y to i64
%shl = shl i64 %ext, 5
@@ -19,6 +19,63 @@ define i64 @test1(i64 %x, i32 %y) {
ret i64 %srl
}
+define i64 @test_xor(i64 %x, i32 %y) {
+;
+; srl (xor (x, shl(zext(y),c1)),c1) -> xor(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test_xor
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_xor_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_xor_param_1];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: xor.b64 %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[LOP]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 5
+ %or = xor i64 %x, %shl
+ %srl = lshr i64 %or, 5
+ ret i64 %srl
+}
+
+define i64 @test_and(i64 %x, i32 %y) {
+;
+; srl (and (x, shl(zext(y),c1)),c1) -> and(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test_and
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_and_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_and_param_1];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: and.b64 %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[LOP]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 5
+ %or = and i64 %x, %shl
+ %srl = lshr i64 %or, 5
+ ret i64 %srl
+}
+
+define <2 x i64> @test_or_vec(<2 x i64> %x, <2 x i32> %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test_or
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_or_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_or_param_1];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: or.b64 %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[LOP]];
+;
+ %ext = zext <2 x i32> %y to <2 x i64>
+ %shl = shl <2 x i64> %ext, splat(i64 5)
+ %or = or <2 x i64> %x, %shl
+ %srl = lshr <2 x i64> %or, splat(i64 5)
+ ret <2 x i64> %srl
+}
+
define i64 @test2(i64 %x, i32 %y) {
;
; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
>From bbbc78b85fa6699c3d27a310efa4152d0adfd3cc Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 19:26:19 +0400
Subject: [PATCH 3/8] support of vectors
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +--
llvm/test/CodeGen/NVPTX/shift-opt.ll | 36 ++++++++++---------
2 files changed, 23 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4b843c754a976..93cc936d8793d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10979,7 +10979,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
}
- // fold (srl (or x, (shl (zext y), c1)), c1) -> (or (srl x, c1), (zext y))
+ // fold (srl (logic_op x, (shl (zext y), c1)), c1)
+ // -> (logic_op (srl x, c1), (zext y))
// c1 <= leadingzeros(zext(y))
if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode())) {
SDValue lhs = N0.getOperand(0);
@@ -11002,7 +11003,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1C->getZExtValue() <= numLeadingZeros) {
return DAG.getNode(
N0.getOpcode(), SDLoc(N0), VT,
- DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
+ DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, N1),
zext);
}
}
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 26850a30d2321..ef19a0d03bb9e 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -57,33 +57,37 @@ define i64 @test_and(i64 %x, i32 %y) {
ret i64 %srl
}
-define <2 x i64> @test_or_vec(<2 x i64> %x, <2 x i32> %y) {
+define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
;
; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
;
-; CHECK-LABEL: test_or
-; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_or_param_0];
-; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_or_param_1];
-; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
-; CHECK: or.b64 %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
-; CHECK: st.param.b64 [func_retval0], %[[LOP]];
+; CHECK-LABEL: test_vec
+; CHECK: ld.param.u32 %[[X:r[0-9]+]], [test_vec_param_0];
+; CHECK: ld.param.u32 %[[P1:r[0-9]+]], [test_vec_param_1];
+; CHECK: and.b32 %[[Y:r[0-9]+]], %[[P1]], 16711935;
+; CHECK: mov.b32 {%[[X1:rs[0-9]+]], %[[X2:rs[0-9]+]]}, %[[X]];
+; CHECK: shr.u16 %[[SHR2:rs[0-9]+]], %[[X2]], 5;
+; CHECK: shr.u16 %[[SHR1:rs[0-9]+]], %[[X1]], 5;
+; CHECK: mov.b32 %[[SHR:r[0-9]+]], {%[[SHR1]], %[[SHR2]]};
+; CHECK: or.b32 %[[LOP:r[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b32 [func_retval0], %[[LOP]];
;
- %ext = zext <2 x i32> %y to <2 x i64>
- %shl = shl <2 x i64> %ext, splat(i64 5)
- %or = or <2 x i64> %x, %shl
- %srl = lshr <2 x i64> %or, splat(i64 5)
- ret <2 x i64> %srl
+ %ext = zext <2 x i8> %y to <2 x i16>
+ %shl = shl <2 x i16> %ext, splat(i16 5)
+ %or = or <2 x i16> %x, %shl
+ %srl = lshr <2 x i16> %or, splat(i16 5)
+ ret <2 x i16> %srl
}
-define i64 @test2(i64 %x, i32 %y) {
+define i64 @test_negative_c(i64 %x, i32 %y) {
;
; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 > leadingzeros(zext(y)).
;
-; CHECK-LABEL: test2
-; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test2_param_0];
-; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test2_param_1];
+; CHECK-LABEL: test_negative_c
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_c_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_negative_c_param_1];
; CHECK: shl.b64 %[[SHL:rd[0-9]+]], %[[Y]], 33;
; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 33;
>From c9f07b8e297f8045dcfc0e1a0e0f7d97a85118ab Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 19:55:07 +0400
Subject: [PATCH 4/8] negative case
---
llvm/test/CodeGen/NVPTX/shift-opt.ll | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index ef19a0d03bb9e..437eda4670dc7 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -61,6 +61,7 @@ define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
;
; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
+; x, y - vectors
;
; CHECK-LABEL: test_vec
; CHECK: ld.param.u32 %[[X:r[0-9]+]], [test_vec_param_0];
@@ -99,3 +100,28 @@ define i64 @test_negative_c(i64 %x, i32 %y) {
%srl = lshr i64 %or, 33
ret i64 %srl
}
+
+declare void @use(i64)
+
+define i64 @test_negative_use_lop(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+;
+;
+; CHECK-LABEL: test_negative_use_lop
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_c_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_negative_c_param_1];
+; CHECK: shl.b64 %[[SHL:rd[0-9]+]], %[[Y]], 33;
+; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 33;
+; CHECK: st.param.b64 [func_retval0], %[[SHR]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 5
+ %or = or i64 %x, %shl
+ %srl = lshr i64 %or, 5
+ call void @use(i64 %or)
+ ret i64 %srl
+}
\ No newline at end of file
>From ac34287f60bfbc90a9b290de75d965be0e5bc078 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 20:31:05 +0400
Subject: [PATCH 5/8] negative tests. Multiple usage
Signed-off-by: Alexander Peskov <apeskov at nvidia.com>
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +-
llvm/test/CodeGen/NVPTX/shift-opt.ll | 42 +++++++++++++++----
2 files changed, 37 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 93cc936d8793d..7b29a586bf639 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10982,7 +10982,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (logic_op x, (shl (zext y), c1)), c1)
// -> (logic_op (srl x, c1), (zext y))
// c1 <= leadingzeros(zext(y))
- if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode())) {
+ if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
SDValue lhs = N0.getOperand(0);
SDValue rhs = N0.getOperand(1);
SDValue shl;
@@ -10994,7 +10994,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
shl = rhs;
other = lhs;
}
- if (shl && shl.getOperand(1) == N1) {
+ if (shl && shl.getOperand(1) == N1 && shl.hasOneUse()) {
SDValue zext = shl.getOperand(0);
if (zext.getOpcode() == ISD::ZERO_EXTEND) {
unsigned numLeadingZeros =
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 437eda4670dc7..855f8f2f91cbc 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -107,15 +107,17 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
;
; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
-;
-;
+; multiple usage of "or"
;
; CHECK-LABEL: test_negative_use_lop
-; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_c_param_0];
-; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_negative_c_param_1];
-; CHECK: shl.b64 %[[SHL:rd[0-9]+]], %[[Y]], 33;
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_lop_param_0];
+; CHECK: ld.param.u32 %[[Y:r[0-9]+]], [test_negative_use_lop_param_1];
+; CHECK: mul.wide.u32 %[[SHL:rd[0-9]+]], %[[Y]], 32;
; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
-; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 33;
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 5;
+; CHECK: { // callseq
+; CHECK: st.param.b64 [param0], %[[OR]];
+; CHECK: } // callseq
; CHECK: st.param.b64 [func_retval0], %[[SHR]];
;
%ext = zext i32 %y to i64
@@ -124,4 +126,30 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
%srl = lshr i64 %or, 5
call void @use(i64 %or)
ret i64 %srl
-}
\ No newline at end of file
+}
+
+
+define i64 @test_negative_use_shl(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+; multiple usage of "shl"
+;
+; CHECK-LABEL: test_negative_use_shl
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_shl_param_0];
+; CHECK: ld.param.u32 %[[Y:r[0-9]+]], [test_negative_use_shl_param_1];
+; CHECK: mul.wide.u32 %[[SHL:rd[0-9]+]], %[[Y]], 32;
+; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 5;
+; CHECK: { // callseq
+; CHECK: st.param.b64 [param0], %[[SHL]];
+; CHECK: } // callseq
+; CHECK: st.param.b64 [func_retval0], %[[SHR]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 5
+ %or = or i64 %x, %shl
+ %srl = lshr i64 %or, 5
+ call void @use(i64 %shl)
+ ret i64 %srl
+}
>From 83865a5157e9356c2c9bc654bcf499c518660039 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 22:00:37 +0400
Subject: [PATCH 6/8] Capitalize variables
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 40 +++++++++----------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7b29a586bf639..f4aa1d8af6b15 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10983,28 +10983,28 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// -> (logic_op (srl x, c1), (zext y))
// c1 <= leadingzeros(zext(y))
if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
- SDValue lhs = N0.getOperand(0);
- SDValue rhs = N0.getOperand(1);
- SDValue shl;
- SDValue other;
- if (lhs.getOpcode() == ISD::SHL) {
- shl = lhs;
- other = rhs;
- } else if (rhs.getOpcode() == ISD::SHL) {
- shl = rhs;
- other = lhs;
- }
- if (shl && shl.getOperand(1) == N1 && shl.hasOneUse()) {
- SDValue zext = shl.getOperand(0);
- if (zext.getOpcode() == ISD::ZERO_EXTEND) {
- unsigned numLeadingZeros =
- zext.getValueType().getScalarSizeInBits() -
- zext.getOperand(0).getValueType().getScalarSizeInBits();
- if (N1C->getZExtValue() <= numLeadingZeros) {
+ SDValue LHS = N0.getOperand(0);
+ SDValue RHS = N0.getOperand(1);
+ SDValue SHL;
+ SDValue Other;
+ if (LHS.getOpcode() == ISD::SHL) {
+ SHL = LHS;
+ Other = RHS;
+ } else if (RHS.getOpcode() == ISD::SHL) {
+ SHL = RHS;
+ Other = LHS;
+ }
+ if (SHL && SHL.getOperand(1) == N1 && SHL.hasOneUse()) {
+ SDValue ZExt = SHL.getOperand(0);
+ if (ZExt.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned NumLeadingZeros =
+ ZExt.getValueType().getScalarSizeInBits() -
+ ZExt.getOperand(0).getValueType().getScalarSizeInBits();
+ if (N1C->getZExtValue() <= NumLeadingZeros) {
return DAG.getNode(
N0.getOpcode(), SDLoc(N0), VT,
- DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, N1),
- zext);
+ DAG.getNode(ISD::SRL, SDLoc(N0), VT, Other, N1),
+ ZExt);
}
}
}
>From 6665d07a34ec2eb9e68fbeb00eed7fe2880ff743 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 22:16:10 +0400
Subject: [PATCH 7/8] minor
---
llvm/test/CodeGen/NVPTX/shift-opt.ll | 27 ++++++++++++---------------
1 file changed, 12 insertions(+), 15 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 855f8f2f91cbc..010ea7ad5c844 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -2,7 +2,7 @@
define i64 @test_or(i64 %x, i32 %y) {
;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
;
; CHECK-LABEL: test_or
@@ -21,7 +21,7 @@ define i64 @test_or(i64 %x, i32 %y) {
define i64 @test_xor(i64 %x, i32 %y) {
;
-; srl (xor (x, shl(zext(y),c1)),c1) -> xor(srl(x,c1), zext(y))
+; Fold: srl (xor (x, shl(zext(y),c1)),c1) -> xor(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
;
; CHECK-LABEL: test_xor
@@ -40,7 +40,7 @@ define i64 @test_xor(i64 %x, i32 %y) {
define i64 @test_and(i64 %x, i32 %y) {
;
-; srl (and (x, shl(zext(y),c1)),c1) -> and(srl(x,c1), zext(y))
+; Fold: srl (and (x, shl(zext(y),c1)),c1) -> and(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
;
; CHECK-LABEL: test_and
@@ -59,7 +59,7 @@ define i64 @test_and(i64 %x, i32 %y) {
define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
; x, y - vectors
;
@@ -83,8 +83,8 @@ define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
define i64 @test_negative_c(i64 %x, i32 %y) {
;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
-; c1 > leadingzeros(zext(y)).
+; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Reason: c1 > leadingzeros(zext(y)).
;
; CHECK-LABEL: test_negative_c
; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_c_param_0];
@@ -105,9 +105,8 @@ declare void @use(i64)
define i64 @test_negative_use_lop(i64 %x, i32 %y) {
;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
-; c1 <= leadingzeros(zext(y))
-; multiple usage of "or"
+; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Reason: multiple usage of "or"
;
; CHECK-LABEL: test_negative_use_lop
; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_lop_param_0];
@@ -116,7 +115,7 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 5;
; CHECK: { // callseq
-; CHECK: st.param.b64 [param0], %[[OR]];
+; CHECK: st.param.b64 [param0], %[[OR]];
; CHECK: } // callseq
; CHECK: st.param.b64 [func_retval0], %[[SHR]];
;
@@ -128,12 +127,10 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
ret i64 %srl
}
-
define i64 @test_negative_use_shl(i64 %x, i32 %y) {
;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
-; c1 <= leadingzeros(zext(y))
-; multiple usage of "shl"
+; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Reason: multiple usage of "shl"
;
; CHECK-LABEL: test_negative_use_shl
; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_shl_param_0];
@@ -142,7 +139,7 @@ define i64 @test_negative_use_shl(i64 %x, i32 %y) {
; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 5;
; CHECK: { // callseq
-; CHECK: st.param.b64 [param0], %[[SHL]];
+; CHECK: st.param.b64 [param0], %[[SHL]];
; CHECK: } // callseq
; CHECK: st.param.b64 [func_retval0], %[[SHR]];
;
>From 4b2cdbb7703a35178ef3a1b14dd12fb319a8b9ad Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 23:17:07 +0400
Subject: [PATCH 8/8] fix lint
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f4aa1d8af6b15..7981a6a9b81db 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11001,10 +11001,9 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
ZExt.getValueType().getScalarSizeInBits() -
ZExt.getOperand(0).getValueType().getScalarSizeInBits();
if (N1C->getZExtValue() <= NumLeadingZeros) {
- return DAG.getNode(
- N0.getOpcode(), SDLoc(N0), VT,
- DAG.getNode(ISD::SRL, SDLoc(N0), VT, Other, N1),
- ZExt);
+ return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+ DAG.getNode(ISD::SRL, SDLoc(N0), VT, Other, N1),
+ ZExt);
}
}
}
More information about the llvm-commits
mailing list