[llvm] [DAGCombiner] Fold pattern for srl-shl-zext (PR #138290)
Alexander Peskov via llvm-commits
llvm-commits at lists.llvm.org
Mon May 12 06:47:46 PDT 2025
https://github.com/apeskov updated https://github.com/llvm/llvm-project/pull/138290
>From eaf60108adb95a71a6f46a2dbc8439f4527dc080 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Mon, 14 Apr 2025 18:05:53 +0400
Subject: [PATCH 01/10] [DAG] Fold patterm for SRL
fold (srl (or x, (shl (zext y), c1), c1) -> (or (srl x, c1), (zext y))
for c1 <= leadingzeros(zext(y))
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 +++++++++++++++
llvm/test/CodeGen/NVPTX/shift-opt.ll | 40 +++++++++++++++++++
2 files changed, 73 insertions(+)
create mode 100644 llvm/test/CodeGen/NVPTX/shift-opt.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 09c6218b3dfd9..2922c83c26497 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10972,6 +10972,39 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
}
+ // fold (srl (or x, (shl (zext y), c1)), c1) -> (or (srl x, c1), (zext y))
+ // c1 <= leadingzeros(zext(y))
+ if (N1C && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND ||
+ N0.getOpcode() == ISD::XOR)) {
+ SDValue lhs = N0.getOperand(0);
+ SDValue rhs = N0.getOperand(1);
+ SDValue shl;
+ SDValue other;
+ if (lhs.getOpcode() == ISD::SHL) {
+ shl = lhs;
+ other = rhs;
+ } else if (rhs.getOpcode() == ISD::SHL) {
+ shl = rhs;
+ other = lhs;
+ }
+ if (shl.getNode()) {
+ if (shl.getOperand(1).getNode() == N1C) {
+ SDValue zext = shl.getOperand(0);
+ if (zext.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned numLeadingZeros =
+ zext.getValueType().getSizeInBits() -
+ zext.getOperand(0).getValueType().getSizeInBits();
+ if (N1C->getZExtValue() <= numLeadingZeros) {
+ return DAG.getNode(
+ N0.getOpcode(), SDLoc(N0), VT,
+ DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
+ zext);
+ }
+ }
+ }
+ }
+ }
+
// fold operands of srl based on knowledge that the low bits are not
// demanded.
if (SimplifyDemandedBits(SDValue(N, 0)))
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
new file mode 100644
index 0000000000000..6686e8d840c6b
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+
+define i64 @test1(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test1
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test1_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test1_param_1];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[OR]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 5
+ %or = or i64 %x, %shl
+ %srl = lshr i64 %or, 5
+ ret i64 %srl
+}
+
+define i64 @test2(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 > leadingzeros(zext(y)).
+;
+; CHECK-LABEL: test2
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test2_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test2_param_1];
+; CHECK: shl.b64 %[[SHL:rd[0-9]+]], %[[Y]], 33;
+; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 33;
+; CHECK: st.param.b64 [func_retval0], %[[SHR]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 33
+ %or = or i64 %x, %shl
+ %srl = lshr i64 %or, 33
+ ret i64 %srl
+}
>From e2896c83dccefa31c5fde3a205967e29ce46c33b Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 17:21:34 +0400
Subject: [PATCH 02/10] comments + refactoring
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 27 ++++----
llvm/test/CodeGen/NVPTX/shift-opt.ll | 69 +++++++++++++++++--
2 files changed, 75 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2922c83c26497..8392767a2e575 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10974,8 +10974,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (or x, (shl (zext y), c1)), c1) -> (or (srl x, c1), (zext y))
// c1 <= leadingzeros(zext(y))
- if (N1C && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND ||
- N0.getOpcode() == ISD::XOR)) {
+ if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode())) {
SDValue lhs = N0.getOperand(0);
SDValue rhs = N0.getOperand(1);
SDValue shl;
@@ -10987,19 +10986,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
shl = rhs;
other = lhs;
}
- if (shl.getNode()) {
- if (shl.getOperand(1).getNode() == N1C) {
- SDValue zext = shl.getOperand(0);
- if (zext.getOpcode() == ISD::ZERO_EXTEND) {
- unsigned numLeadingZeros =
- zext.getValueType().getSizeInBits() -
- zext.getOperand(0).getValueType().getSizeInBits();
- if (N1C->getZExtValue() <= numLeadingZeros) {
- return DAG.getNode(
- N0.getOpcode(), SDLoc(N0), VT,
- DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
- zext);
- }
+ if (shl && shl.getOperand(1) == N1) {
+ SDValue zext = shl.getOperand(0);
+ if (zext.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned numLeadingZeros =
+ zext.getValueType().getScalarSizeInBits() -
+ zext.getOperand(0).getValueType().getScalarSizeInBits();
+ if (N1C->getZExtValue() <= numLeadingZeros) {
+ return DAG.getNode(
+ N0.getOpcode(), SDLoc(N0), VT,
+ DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
+ zext);
}
}
}
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 6686e8d840c6b..26850a30d2321 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -1,16 +1,16 @@
; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
-define i64 @test1(i64 %x, i32 %y) {
+define i64 @test_or(i64 %x, i32 %y) {
;
; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
;
-; CHECK-LABEL: test1
-; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test1_param_0];
-; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test1_param_1];
+; CHECK-LABEL: test_or
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_or_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_or_param_1];
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
-; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[SHR]], %[[Y]];
-; CHECK: st.param.b64 [func_retval0], %[[OR]];
+; CHECK: or.b64 %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[LOP]];
;
%ext = zext i32 %y to i64
%shl = shl i64 %ext, 5
@@ -19,6 +19,63 @@ define i64 @test1(i64 %x, i32 %y) {
ret i64 %srl
}
+define i64 @test_xor(i64 %x, i32 %y) {
+;
+; srl (xor (x, shl(zext(y),c1)),c1) -> xor(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test_xor
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_xor_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_xor_param_1];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: xor.b64 %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[LOP]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 5
+ %or = xor i64 %x, %shl
+ %srl = lshr i64 %or, 5
+ ret i64 %srl
+}
+
+define i64 @test_and(i64 %x, i32 %y) {
+;
+; srl (and (x, shl(zext(y),c1)),c1) -> and(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test_and
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_and_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_and_param_1];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: and.b64 %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[LOP]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 5
+ %or = and i64 %x, %shl
+ %srl = lshr i64 %or, 5
+ ret i64 %srl
+}
+
+define <2 x i64> @test_or_vec(<2 x i64> %x, <2 x i32> %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test_or
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_or_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_or_param_1];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: or.b64 %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[LOP]];
+;
+ %ext = zext <2 x i32> %y to <2 x i64>
+ %shl = shl <2 x i64> %ext, splat(i64 5)
+ %or = or <2 x i64> %x, %shl
+ %srl = lshr <2 x i64> %or, splat(i64 5)
+ ret <2 x i64> %srl
+}
+
define i64 @test2(i64 %x, i32 %y) {
;
; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
>From d6a319bc1e323c1c92ee096eb0a99766d795316a Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 19:26:19 +0400
Subject: [PATCH 03/10] support of vectors
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +--
llvm/test/CodeGen/NVPTX/shift-opt.ll | 36 ++++++++++---------
2 files changed, 23 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8392767a2e575..c4494534c85bf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10972,7 +10972,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
}
- // fold (srl (or x, (shl (zext y), c1)), c1) -> (or (srl x, c1), (zext y))
+ // fold (srl (logic_op x, (shl (zext y), c1)), c1)
+ // -> (logic_op (srl x, c1), (zext y))
// c1 <= leadingzeros(zext(y))
if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode())) {
SDValue lhs = N0.getOperand(0);
@@ -10995,7 +10996,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1C->getZExtValue() <= numLeadingZeros) {
return DAG.getNode(
N0.getOpcode(), SDLoc(N0), VT,
- DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
+ DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, N1),
zext);
}
}
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 26850a30d2321..ef19a0d03bb9e 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -57,33 +57,37 @@ define i64 @test_and(i64 %x, i32 %y) {
ret i64 %srl
}
-define <2 x i64> @test_or_vec(<2 x i64> %x, <2 x i32> %y) {
+define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
;
; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
;
-; CHECK-LABEL: test_or
-; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_or_param_0];
-; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_or_param_1];
-; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
-; CHECK: or.b64 %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
-; CHECK: st.param.b64 [func_retval0], %[[LOP]];
+; CHECK-LABEL: test_vec
+; CHECK: ld.param.u32 %[[X:r[0-9]+]], [test_vec_param_0];
+; CHECK: ld.param.u32 %[[P1:r[0-9]+]], [test_vec_param_1];
+; CHECK: and.b32 %[[Y:r[0-9]+]], %[[P1]], 16711935;
+; CHECK: mov.b32 {%[[X1:rs[0-9]+]], %[[X2:rs[0-9]+]]}, %[[X]];
+; CHECK: shr.u16 %[[SHR2:rs[0-9]+]], %[[X2]], 5;
+; CHECK: shr.u16 %[[SHR1:rs[0-9]+]], %[[X1]], 5;
+; CHECK: mov.b32 %[[SHR:r[0-9]+]], {%[[SHR1]], %[[SHR2]]};
+; CHECK: or.b32 %[[LOP:r[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b32 [func_retval0], %[[LOP]];
;
- %ext = zext <2 x i32> %y to <2 x i64>
- %shl = shl <2 x i64> %ext, splat(i64 5)
- %or = or <2 x i64> %x, %shl
- %srl = lshr <2 x i64> %or, splat(i64 5)
- ret <2 x i64> %srl
+ %ext = zext <2 x i8> %y to <2 x i16>
+ %shl = shl <2 x i16> %ext, splat(i16 5)
+ %or = or <2 x i16> %x, %shl
+ %srl = lshr <2 x i16> %or, splat(i16 5)
+ ret <2 x i16> %srl
}
-define i64 @test2(i64 %x, i32 %y) {
+define i64 @test_negative_c(i64 %x, i32 %y) {
;
; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 > leadingzeros(zext(y)).
;
-; CHECK-LABEL: test2
-; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test2_param_0];
-; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test2_param_1];
+; CHECK-LABEL: test_negative_c
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_c_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_negative_c_param_1];
; CHECK: shl.b64 %[[SHL:rd[0-9]+]], %[[Y]], 33;
; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 33;
>From 71db1d9206d98654ace3e43c52e12c76288bdd08 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 19:55:07 +0400
Subject: [PATCH 04/10] negative case
---
llvm/test/CodeGen/NVPTX/shift-opt.ll | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index ef19a0d03bb9e..437eda4670dc7 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -61,6 +61,7 @@ define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
;
; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
+; x, y - vectors
;
; CHECK-LABEL: test_vec
; CHECK: ld.param.u32 %[[X:r[0-9]+]], [test_vec_param_0];
@@ -99,3 +100,28 @@ define i64 @test_negative_c(i64 %x, i32 %y) {
%srl = lshr i64 %or, 33
ret i64 %srl
}
+
+declare void @use(i64)
+
+define i64 @test_negative_use_lop(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+;
+;
+; CHECK-LABEL: test_negative_use_lop
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_c_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_negative_c_param_1];
+; CHECK: shl.b64 %[[SHL:rd[0-9]+]], %[[Y]], 33;
+; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 33;
+; CHECK: st.param.b64 [func_retval0], %[[SHR]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 5
+ %or = or i64 %x, %shl
+ %srl = lshr i64 %or, 5
+ call void @use(i64 %or)
+ ret i64 %srl
+}
\ No newline at end of file
>From 3b31baf3526860ddb7456e89dcf3f1a0a3e11e64 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 20:31:05 +0400
Subject: [PATCH 05/10] negative tests. Multiple usage
Signed-off-by: Alexander Peskov <apeskov at nvidia.com>
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +-
llvm/test/CodeGen/NVPTX/shift-opt.ll | 42 +++++++++++++++----
2 files changed, 37 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c4494534c85bf..7c3c9b513b17f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10975,7 +10975,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (logic_op x, (shl (zext y), c1)), c1)
// -> (logic_op (srl x, c1), (zext y))
// c1 <= leadingzeros(zext(y))
- if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode())) {
+ if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
SDValue lhs = N0.getOperand(0);
SDValue rhs = N0.getOperand(1);
SDValue shl;
@@ -10987,7 +10987,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
shl = rhs;
other = lhs;
}
- if (shl && shl.getOperand(1) == N1) {
+ if (shl && shl.getOperand(1) == N1 && shl.hasOneUse()) {
SDValue zext = shl.getOperand(0);
if (zext.getOpcode() == ISD::ZERO_EXTEND) {
unsigned numLeadingZeros =
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 437eda4670dc7..855f8f2f91cbc 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -107,15 +107,17 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
;
; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
-;
-;
+; multiple usage of "or"
;
; CHECK-LABEL: test_negative_use_lop
-; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_c_param_0];
-; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_negative_c_param_1];
-; CHECK: shl.b64 %[[SHL:rd[0-9]+]], %[[Y]], 33;
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_lop_param_0];
+; CHECK: ld.param.u32 %[[Y:r[0-9]+]], [test_negative_use_lop_param_1];
+; CHECK: mul.wide.u32 %[[SHL:rd[0-9]+]], %[[Y]], 32;
; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
-; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 33;
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 5;
+; CHECK: { // callseq
+; CHECK: st.param.b64 [param0], %[[OR]];
+; CHECK: } // callseq
; CHECK: st.param.b64 [func_retval0], %[[SHR]];
;
%ext = zext i32 %y to i64
@@ -124,4 +126,30 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
%srl = lshr i64 %or, 5
call void @use(i64 %or)
ret i64 %srl
-}
\ No newline at end of file
+}
+
+
+define i64 @test_negative_use_shl(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+; multiple usage of "shl"
+;
+; CHECK-LABEL: test_negative_use_shl
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_shl_param_0];
+; CHECK: ld.param.u32 %[[Y:r[0-9]+]], [test_negative_use_shl_param_1];
+; CHECK: mul.wide.u32 %[[SHL:rd[0-9]+]], %[[Y]], 32;
+; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
+; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 5;
+; CHECK: { // callseq
+; CHECK: st.param.b64 [param0], %[[SHL]];
+; CHECK: } // callseq
+; CHECK: st.param.b64 [func_retval0], %[[SHR]];
+;
+ %ext = zext i32 %y to i64
+ %shl = shl i64 %ext, 5
+ %or = or i64 %x, %shl
+ %srl = lshr i64 %or, 5
+ call void @use(i64 %shl)
+ ret i64 %srl
+}
>From 9b44a065bdc58b4bd7c6165fa5ea50fe5b81cb46 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 22:00:37 +0400
Subject: [PATCH 06/10] Capitalize variables
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 40 +++++++++----------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7c3c9b513b17f..b1b5c53352572 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10976,28 +10976,28 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// -> (logic_op (srl x, c1), (zext y))
// c1 <= leadingzeros(zext(y))
if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
- SDValue lhs = N0.getOperand(0);
- SDValue rhs = N0.getOperand(1);
- SDValue shl;
- SDValue other;
- if (lhs.getOpcode() == ISD::SHL) {
- shl = lhs;
- other = rhs;
- } else if (rhs.getOpcode() == ISD::SHL) {
- shl = rhs;
- other = lhs;
- }
- if (shl && shl.getOperand(1) == N1 && shl.hasOneUse()) {
- SDValue zext = shl.getOperand(0);
- if (zext.getOpcode() == ISD::ZERO_EXTEND) {
- unsigned numLeadingZeros =
- zext.getValueType().getScalarSizeInBits() -
- zext.getOperand(0).getValueType().getScalarSizeInBits();
- if (N1C->getZExtValue() <= numLeadingZeros) {
+ SDValue LHS = N0.getOperand(0);
+ SDValue RHS = N0.getOperand(1);
+ SDValue SHL;
+ SDValue Other;
+ if (LHS.getOpcode() == ISD::SHL) {
+ SHL = LHS;
+ Other = RHS;
+ } else if (RHS.getOpcode() == ISD::SHL) {
+ SHL = RHS;
+ Other = LHS;
+ }
+ if (SHL && SHL.getOperand(1) == N1 && SHL.hasOneUse()) {
+ SDValue ZExt = SHL.getOperand(0);
+ if (ZExt.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned NumLeadingZeros =
+ ZExt.getValueType().getScalarSizeInBits() -
+ ZExt.getOperand(0).getValueType().getScalarSizeInBits();
+ if (N1C->getZExtValue() <= NumLeadingZeros) {
return DAG.getNode(
N0.getOpcode(), SDLoc(N0), VT,
- DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, N1),
- zext);
+ DAG.getNode(ISD::SRL, SDLoc(N0), VT, Other, N1),
+ ZExt);
}
}
}
>From aae650eb718fd0165878a09acae43e70410324fa Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 22:16:10 +0400
Subject: [PATCH 07/10] minor
---
llvm/test/CodeGen/NVPTX/shift-opt.ll | 27 ++++++++++++---------------
1 file changed, 12 insertions(+), 15 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 855f8f2f91cbc..010ea7ad5c844 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -2,7 +2,7 @@
define i64 @test_or(i64 %x, i32 %y) {
;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
;
; CHECK-LABEL: test_or
@@ -21,7 +21,7 @@ define i64 @test_or(i64 %x, i32 %y) {
define i64 @test_xor(i64 %x, i32 %y) {
;
-; srl (xor (x, shl(zext(y),c1)),c1) -> xor(srl(x,c1), zext(y))
+; Fold: srl (xor (x, shl(zext(y),c1)),c1) -> xor(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
;
; CHECK-LABEL: test_xor
@@ -40,7 +40,7 @@ define i64 @test_xor(i64 %x, i32 %y) {
define i64 @test_and(i64 %x, i32 %y) {
;
-; srl (and (x, shl(zext(y),c1)),c1) -> and(srl(x,c1), zext(y))
+; Fold: srl (and (x, shl(zext(y),c1)),c1) -> and(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
;
; CHECK-LABEL: test_and
@@ -59,7 +59,7 @@ define i64 @test_and(i64 %x, i32 %y) {
define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
; c1 <= leadingzeros(zext(y))
; x, y - vectors
;
@@ -83,8 +83,8 @@ define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
define i64 @test_negative_c(i64 %x, i32 %y) {
;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
-; c1 > leadingzeros(zext(y)).
+; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Reason: c1 > leadingzeros(zext(y)).
;
; CHECK-LABEL: test_negative_c
; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_c_param_0];
@@ -105,9 +105,8 @@ declare void @use(i64)
define i64 @test_negative_use_lop(i64 %x, i32 %y) {
;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
-; c1 <= leadingzeros(zext(y))
-; multiple usage of "or"
+; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Reason: multiple usage of "or"
;
; CHECK-LABEL: test_negative_use_lop
; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_lop_param_0];
@@ -116,7 +115,7 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 5;
; CHECK: { // callseq
-; CHECK: st.param.b64 [param0], %[[OR]];
+; CHECK: st.param.b64 [param0], %[[OR]];
; CHECK: } // callseq
; CHECK: st.param.b64 [func_retval0], %[[SHR]];
;
@@ -128,12 +127,10 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
ret i64 %srl
}
-
define i64 @test_negative_use_shl(i64 %x, i32 %y) {
;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
-; c1 <= leadingzeros(zext(y))
-; multiple usage of "shl"
+; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Reason: multiple usage of "shl"
;
; CHECK-LABEL: test_negative_use_shl
; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_shl_param_0];
@@ -142,7 +139,7 @@ define i64 @test_negative_use_shl(i64 %x, i32 %y) {
; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 5;
; CHECK: { // callseq
-; CHECK: st.param.b64 [param0], %[[SHL]];
+; CHECK: st.param.b64 [param0], %[[SHL]];
; CHECK: } // callseq
; CHECK: st.param.b64 [func_retval0], %[[SHR]];
;
>From fc5c9c652fc6deb20c3bc898de42ddd757678171 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 23:17:07 +0400
Subject: [PATCH 08/10] fix lint
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b1b5c53352572..8469b295244f8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10994,10 +10994,9 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
ZExt.getValueType().getScalarSizeInBits() -
ZExt.getOperand(0).getValueType().getScalarSizeInBits();
if (N1C->getZExtValue() <= NumLeadingZeros) {
- return DAG.getNode(
- N0.getOpcode(), SDLoc(N0), VT,
- DAG.getNode(ISD::SRL, SDLoc(N0), VT, Other, N1),
- ZExt);
+ return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+ DAG.getNode(ISD::SRL, SDLoc(N0), VT, Other, N1),
+ ZExt);
}
}
}
>From a3ac5114987e1d3d12320c43a0808cdf0434f875 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Wed, 7 May 2025 17:53:17 +0400
Subject: [PATCH 09/10] reword with sd_match
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 36 +++++++------------
1 file changed, 12 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8469b295244f8..29d0bcc777a22 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10975,30 +10975,18 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (logic_op x, (shl (zext y), c1)), c1)
// -> (logic_op (srl x, c1), (zext y))
// c1 <= leadingzeros(zext(y))
- if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
- SDValue LHS = N0.getOperand(0);
- SDValue RHS = N0.getOperand(1);
- SDValue SHL;
- SDValue Other;
- if (LHS.getOpcode() == ISD::SHL) {
- SHL = LHS;
- Other = RHS;
- } else if (RHS.getOpcode() == ISD::SHL) {
- SHL = RHS;
- Other = LHS;
- }
- if (SHL && SHL.getOperand(1) == N1 && SHL.hasOneUse()) {
- SDValue ZExt = SHL.getOperand(0);
- if (ZExt.getOpcode() == ISD::ZERO_EXTEND) {
- unsigned NumLeadingZeros =
- ZExt.getValueType().getScalarSizeInBits() -
- ZExt.getOperand(0).getValueType().getScalarSizeInBits();
- if (N1C->getZExtValue() <= NumLeadingZeros) {
- return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
- DAG.getNode(ISD::SRL, SDLoc(N0), VT, Other, N1),
- ZExt);
- }
- }
+ SDValue X, ZExtY;
+ if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
+ m_Value(X),
+ m_OneUse(m_Shl(m_AllOf(m_Value(ZExtY),
+ m_Opc(ISD::ZERO_EXTEND)),
+ m_Specific(N1))))))) {
+ unsigned NumLeadingZeros =
+ ZExtY.getValueType().getScalarSizeInBits() -
+ ZExtY.getOperand(0).getValueType().getScalarSizeInBits();
+ if (N1C->getZExtValue() <= NumLeadingZeros) {
+ return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+ DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
}
}
>From f6a6f1b25fd12aed3c6b9a422b026865332476b2 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Mon, 12 May 2025 17:18:54 +0400
Subject: [PATCH 10/10] minor. style
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 29d0bcc777a22..6f27eae68d647 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10981,13 +10981,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
m_OneUse(m_Shl(m_AllOf(m_Value(ZExtY),
m_Opc(ISD::ZERO_EXTEND)),
m_Specific(N1))))))) {
- unsigned NumLeadingZeros =
- ZExtY.getValueType().getScalarSizeInBits() -
- ZExtY.getOperand(0).getValueType().getScalarSizeInBits();
- if (N1C->getZExtValue() <= NumLeadingZeros) {
+ unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
+ ZExtY.getOperand(0).getScalarValueSizeInBits();
+ if (N1C->getZExtValue() <= NumLeadingZeros)
return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
- }
}
// fold operands of srl based on knowledge that the low bits are not
More information about the llvm-commits
mailing list