[llvm] [DAGCombiner] Fold pattern for srl-shl-zext (PR #138290)

Alexander Peskov via llvm-commits llvm-commits at lists.llvm.org
Tue May 6 12:17:35 PDT 2025


https://github.com/apeskov updated https://github.com/llvm/llvm-project/pull/138290

>From 539724b02af3f4755e2db1ad45c9ddb1537445bf Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Mon, 14 Apr 2025 18:05:53 +0400
Subject: [PATCH 1/8] [DAG] Fold patterm for SRL

fold (srl (or x, (shl (zext y), c1), c1) -> (or (srl x, c1), (zext y))
for c1 <= leadingzeros(zext(y))
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 +++++++++++++++
 llvm/test/CodeGen/NVPTX/shift-opt.ll          | 40 +++++++++++++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 llvm/test/CodeGen/NVPTX/shift-opt.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ea1435c3934be..9ddac013be280 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10979,6 +10979,39 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
   }
 
+  // fold (srl (or x, (shl (zext y), c1)), c1) -> (or (srl x, c1), (zext y))
+  // c1 <= leadingzeros(zext(y))
+  if (N1C && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND ||
+              N0.getOpcode() == ISD::XOR)) {
+    SDValue lhs = N0.getOperand(0);
+    SDValue rhs = N0.getOperand(1);
+    SDValue shl;
+    SDValue other;
+    if (lhs.getOpcode() == ISD::SHL) {
+      shl = lhs;
+      other = rhs;
+    } else if (rhs.getOpcode() == ISD::SHL) {
+      shl = rhs;
+      other = lhs;
+    }
+    if (shl.getNode()) {
+      if (shl.getOperand(1).getNode() == N1C) {
+        SDValue zext = shl.getOperand(0);
+        if (zext.getOpcode() == ISD::ZERO_EXTEND) {
+          unsigned numLeadingZeros =
+              zext.getValueType().getSizeInBits() -
+              zext.getOperand(0).getValueType().getSizeInBits();
+          if (N1C->getZExtValue() <= numLeadingZeros) {
+            return DAG.getNode(
+                N0.getOpcode(), SDLoc(N0), VT,
+                DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
+                zext);
+          }
+        }
+      }
+    }
+  }
+
   // fold operands of srl based on knowledge that the low bits are not
   // demanded.
   if (SimplifyDemandedBits(SDValue(N, 0)))
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
new file mode 100644
index 0000000000000..6686e8d840c6b
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
+
+define i64 @test1(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test1
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test1_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test1_param_1];
+; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: or.b64       %[[OR:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[OR]];
+;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 5
+  %or = or i64 %x, %shl
+  %srl = lshr i64 %or, 5
+  ret i64 %srl
+}
+
+define i64 @test2(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 > leadingzeros(zext(y)).
+;
+; CHECK-LABEL: test2
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test2_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test2_param_1];
+; CHECK: shl.b64      %[[SHL:rd[0-9]+]], %[[Y]], 33;
+; CHECK: or.b64       %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
+; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[OR]], 33;
+; CHECK: st.param.b64 [func_retval0], %[[SHR]];
+;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 33
+  %or = or i64 %x, %shl
+  %srl = lshr i64 %or, 33
+  ret i64 %srl
+}

>From 07a38274a8806b35a10906fcdf863af596000312 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 17:21:34 +0400
Subject: [PATCH 2/8] comments + refactoring

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 27 ++++----
 llvm/test/CodeGen/NVPTX/shift-opt.ll          | 69 +++++++++++++++++--
 2 files changed, 75 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9ddac013be280..4b843c754a976 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10981,8 +10981,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
   // fold (srl (or x, (shl (zext y), c1)), c1) -> (or (srl x, c1), (zext y))
   // c1 <= leadingzeros(zext(y))
-  if (N1C && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND ||
-              N0.getOpcode() == ISD::XOR)) {
+  if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode())) {
     SDValue lhs = N0.getOperand(0);
     SDValue rhs = N0.getOperand(1);
     SDValue shl;
@@ -10994,19 +10993,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       shl = rhs;
       other = lhs;
     }
-    if (shl.getNode()) {
-      if (shl.getOperand(1).getNode() == N1C) {
-        SDValue zext = shl.getOperand(0);
-        if (zext.getOpcode() == ISD::ZERO_EXTEND) {
-          unsigned numLeadingZeros =
-              zext.getValueType().getSizeInBits() -
-              zext.getOperand(0).getValueType().getSizeInBits();
-          if (N1C->getZExtValue() <= numLeadingZeros) {
-            return DAG.getNode(
-                N0.getOpcode(), SDLoc(N0), VT,
-                DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
-                zext);
-          }
+    if (shl && shl.getOperand(1) == N1) {
+      SDValue zext = shl.getOperand(0);
+      if (zext.getOpcode() == ISD::ZERO_EXTEND) {
+        unsigned numLeadingZeros =
+            zext.getValueType().getScalarSizeInBits() -
+            zext.getOperand(0).getValueType().getScalarSizeInBits();
+        if (N1C->getZExtValue() <= numLeadingZeros) {
+          return DAG.getNode(
+              N0.getOpcode(), SDLoc(N0), VT,
+              DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
+              zext);
         }
       }
     }
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 6686e8d840c6b..26850a30d2321 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -1,16 +1,16 @@
 ; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
 
-define i64 @test1(i64 %x, i32 %y) {
+define i64 @test_or(i64 %x, i32 %y) {
 ;
 ; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
 ; c1 <= leadingzeros(zext(y))
 ;
-; CHECK-LABEL: test1
-; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test1_param_0];
-; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test1_param_1];
+; CHECK-LABEL: test_or
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_or_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_or_param_1];
 ; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[X]], 5;
-; CHECK: or.b64       %[[OR:rd[0-9]+]], %[[SHR]], %[[Y]];
-; CHECK: st.param.b64 [func_retval0], %[[OR]];
+; CHECK: or.b64       %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[LOP]];
 ;
   %ext = zext i32 %y to i64
   %shl = shl i64 %ext, 5
@@ -19,6 +19,63 @@ define i64 @test1(i64 %x, i32 %y) {
   ret i64 %srl
 }
 
+define i64 @test_xor(i64 %x, i32 %y) {
+;
+; srl (xor (x, shl(zext(y),c1)),c1) -> xor(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test_xor
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_xor_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_xor_param_1];
+; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: xor.b64      %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[LOP]];
+;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 5
+  %or = xor i64 %x, %shl
+  %srl = lshr i64 %or, 5
+  ret i64 %srl
+}
+
+define i64 @test_and(i64 %x, i32 %y) {
+;
+; srl (and (x, shl(zext(y),c1)),c1) -> and(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test_and
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_and_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_and_param_1];
+; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: and.b64      %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[LOP]];
+;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 5
+  %or = and i64 %x, %shl
+  %srl = lshr i64 %or, 5
+  ret i64 %srl
+}
+
+define <2 x i64> @test_or_vec(<2 x i64> %x, <2 x i32> %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;
+; CHECK-LABEL: test_or
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_or_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_or_param_1];
+; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[X]], 5;
+; CHECK: or.b64       %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b64 [func_retval0], %[[LOP]];
+;
+  %ext = zext <2 x i32> %y to <2 x i64>
+  %shl = shl <2 x i64> %ext, splat(i64 5)
+  %or = or <2 x i64> %x, %shl
+  %srl = lshr <2 x i64> %or, splat(i64 5)
+  ret <2 x i64> %srl
+}
+
 define i64 @test2(i64 %x, i32 %y) {
 ;
 ; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))

>From bbbc78b85fa6699c3d27a310efa4152d0adfd3cc Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 19:26:19 +0400
Subject: [PATCH 3/8] support of vectors

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  5 +--
 llvm/test/CodeGen/NVPTX/shift-opt.ll          | 36 ++++++++++---------
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4b843c754a976..93cc936d8793d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10979,7 +10979,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
   }
 
-  // fold (srl (or x, (shl (zext y), c1)), c1) -> (or (srl x, c1), (zext y))
+  // fold (srl (logic_op x, (shl (zext y), c1)), c1)
+  //   -> (logic_op (srl x, c1), (zext y))
   // c1 <= leadingzeros(zext(y))
   if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode())) {
     SDValue lhs = N0.getOperand(0);
@@ -11002,7 +11003,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
         if (N1C->getZExtValue() <= numLeadingZeros) {
           return DAG.getNode(
               N0.getOpcode(), SDLoc(N0), VT,
-              DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, SDValue(N1C, 0)),
+              DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, N1),
               zext);
         }
       }
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 26850a30d2321..ef19a0d03bb9e 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -57,33 +57,37 @@ define i64 @test_and(i64 %x, i32 %y) {
   ret i64 %srl
 }
 
-define <2 x i64> @test_or_vec(<2 x i64> %x, <2 x i32> %y) {
+define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
 ;
 ; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
 ; c1 <= leadingzeros(zext(y))
 ;
-; CHECK-LABEL: test_or
-; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_or_param_0];
-; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_or_param_1];
-; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[X]], 5;
-; CHECK: or.b64       %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
-; CHECK: st.param.b64 [func_retval0], %[[LOP]];
+; CHECK-LABEL: test_vec
+; CHECK: ld.param.u32 %[[X:r[0-9]+]], [test_vec_param_0];
+; CHECK: ld.param.u32 %[[P1:r[0-9]+]], [test_vec_param_1];
+; CHECK: and.b32      %[[Y:r[0-9]+]], %[[P1]], 16711935;
+; CHECK: mov.b32      {%[[X1:rs[0-9]+]], %[[X2:rs[0-9]+]]}, %[[X]];
+; CHECK: shr.u16      %[[SHR2:rs[0-9]+]], %[[X2]], 5;
+; CHECK: shr.u16      %[[SHR1:rs[0-9]+]], %[[X1]], 5;
+; CHECK: mov.b32      %[[SHR:r[0-9]+]], {%[[SHR1]], %[[SHR2]]};
+; CHECK: or.b32       %[[LOP:r[0-9]+]], %[[SHR]], %[[Y]];
+; CHECK: st.param.b32 [func_retval0], %[[LOP]];
 ;
-  %ext = zext <2 x i32> %y to <2 x i64>
-  %shl = shl <2 x i64> %ext, splat(i64 5)
-  %or = or <2 x i64> %x, %shl
-  %srl = lshr <2 x i64> %or, splat(i64 5)
-  ret <2 x i64> %srl
+  %ext = zext <2 x i8> %y to <2 x i16>
+  %shl = shl <2 x i16> %ext, splat(i16 5)
+  %or = or <2 x i16> %x, %shl
+  %srl = lshr <2 x i16> %or, splat(i16 5)
+  ret <2 x i16> %srl
 }
 
-define i64 @test2(i64 %x, i32 %y) {
+define i64 @test_negative_c(i64 %x, i32 %y) {
 ;
 ; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
 ; c1 > leadingzeros(zext(y)).
 ;
-; CHECK-LABEL: test2
-; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test2_param_0];
-; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test2_param_1];
+; CHECK-LABEL: test_negative_c
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_c_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_negative_c_param_1];
 ; CHECK: shl.b64      %[[SHL:rd[0-9]+]], %[[Y]], 33;
 ; CHECK: or.b64       %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
 ; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[OR]], 33;

>From c9f07b8e297f8045dcfc0e1a0e0f7d97a85118ab Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 19:55:07 +0400
Subject: [PATCH 4/8] negative case

---
 llvm/test/CodeGen/NVPTX/shift-opt.ll | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index ef19a0d03bb9e..437eda4670dc7 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -61,6 +61,7 @@ define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
 ;
 ; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
 ; c1 <= leadingzeros(zext(y))
+; x, y - vectors
 ;
 ; CHECK-LABEL: test_vec
 ; CHECK: ld.param.u32 %[[X:r[0-9]+]], [test_vec_param_0];
@@ -99,3 +100,28 @@ define i64 @test_negative_c(i64 %x, i32 %y) {
   %srl = lshr i64 %or, 33
   ret i64 %srl
 }
+
+declare void @use(i64)
+
+define i64 @test_negative_use_lop(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+;  
+; 
+;
+; CHECK-LABEL: test_negative_use_lop
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_c_param_0];
+; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_negative_c_param_1];
+; CHECK: shl.b64      %[[SHL:rd[0-9]+]], %[[Y]], 33;
+; CHECK: or.b64       %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
+; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[OR]], 33;
+; CHECK: st.param.b64 [func_retval0], %[[SHR]];
+;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 5
+  %or = or i64 %x, %shl
+  %srl = lshr i64 %or, 5
+  call void @use(i64 %or)
+  ret i64 %srl
+}
\ No newline at end of file

>From ac34287f60bfbc90a9b290de75d965be0e5bc078 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 20:31:05 +0400
Subject: [PATCH 5/8] negative tests. Multiple usage

Signed-off-by: Alexander Peskov <apeskov at nvidia.com>
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  4 +-
 llvm/test/CodeGen/NVPTX/shift-opt.ll          | 42 +++++++++++++++----
 2 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 93cc936d8793d..7b29a586bf639 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10982,7 +10982,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   // fold (srl (logic_op x, (shl (zext y), c1)), c1)
   //   -> (logic_op (srl x, c1), (zext y))
   // c1 <= leadingzeros(zext(y))
-  if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode())) {
+  if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
     SDValue lhs = N0.getOperand(0);
     SDValue rhs = N0.getOperand(1);
     SDValue shl;
@@ -10994,7 +10994,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       shl = rhs;
       other = lhs;
     }
-    if (shl && shl.getOperand(1) == N1) {
+    if (shl && shl.getOperand(1) == N1 && shl.hasOneUse()) {
       SDValue zext = shl.getOperand(0);
       if (zext.getOpcode() == ISD::ZERO_EXTEND) {
         unsigned numLeadingZeros =
diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 437eda4670dc7..855f8f2f91cbc 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -107,15 +107,17 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
 ;
 ; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
 ; c1 <= leadingzeros(zext(y))
-;  
-; 
+; multiple usage of "or"
 ;
 ; CHECK-LABEL: test_negative_use_lop
-; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_c_param_0];
-; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_negative_c_param_1];
-; CHECK: shl.b64      %[[SHL:rd[0-9]+]], %[[Y]], 33;
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_lop_param_0];
+; CHECK: ld.param.u32 %[[Y:r[0-9]+]], [test_negative_use_lop_param_1];
+; CHECK: mul.wide.u32 %[[SHL:rd[0-9]+]], %[[Y]], 32;
 ; CHECK: or.b64       %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
-; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[OR]], 33;
+; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[OR]], 5;
+; CHECK: { // callseq
+; CHECK:      st.param.b64    [param0], %[[OR]];
+; CHECK: } // callseq
 ; CHECK: st.param.b64 [func_retval0], %[[SHR]];
 ;
   %ext = zext i32 %y to i64
@@ -124,4 +126,30 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
   %srl = lshr i64 %or, 5
   call void @use(i64 %or)
   ret i64 %srl
-}
\ No newline at end of file
+}
+
+
+define i64 @test_negative_use_shl(i64 %x, i32 %y) {
+;
+; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; c1 <= leadingzeros(zext(y))
+; multiple usage of "shl"
+;
+; CHECK-LABEL: test_negative_use_shl
+; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_shl_param_0];
+; CHECK: ld.param.u32 %[[Y:r[0-9]+]], [test_negative_use_shl_param_1];
+; CHECK: mul.wide.u32 %[[SHL:rd[0-9]+]], %[[Y]], 32;
+; CHECK: or.b64       %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
+; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[OR]], 5;
+; CHECK: { // callseq
+; CHECK:      st.param.b64    [param0], %[[SHL]];
+; CHECK: } // callseq
+; CHECK: st.param.b64 [func_retval0], %[[SHR]];
+;
+  %ext = zext i32 %y to i64
+  %shl = shl i64 %ext, 5
+  %or = or i64 %x, %shl
+  %srl = lshr i64 %or, 5
+  call void @use(i64 %shl)
+  ret i64 %srl
+}

>From 83865a5157e9356c2c9bc654bcf499c518660039 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 22:00:37 +0400
Subject: [PATCH 6/8] Capitalize variables

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 40 +++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7b29a586bf639..f4aa1d8af6b15 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10983,28 +10983,28 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   //   -> (logic_op (srl x, c1), (zext y))
   // c1 <= leadingzeros(zext(y))
   if (N1C && ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
-    SDValue lhs = N0.getOperand(0);
-    SDValue rhs = N0.getOperand(1);
-    SDValue shl;
-    SDValue other;
-    if (lhs.getOpcode() == ISD::SHL) {
-      shl = lhs;
-      other = rhs;
-    } else if (rhs.getOpcode() == ISD::SHL) {
-      shl = rhs;
-      other = lhs;
-    }
-    if (shl && shl.getOperand(1) == N1 && shl.hasOneUse()) {
-      SDValue zext = shl.getOperand(0);
-      if (zext.getOpcode() == ISD::ZERO_EXTEND) {
-        unsigned numLeadingZeros =
-            zext.getValueType().getScalarSizeInBits() -
-            zext.getOperand(0).getValueType().getScalarSizeInBits();
-        if (N1C->getZExtValue() <= numLeadingZeros) {
+    SDValue LHS = N0.getOperand(0);
+    SDValue RHS = N0.getOperand(1);
+    SDValue SHL;
+    SDValue Other;
+    if (LHS.getOpcode() == ISD::SHL) {
+      SHL = LHS;
+      Other = RHS;
+    } else if (RHS.getOpcode() == ISD::SHL) {
+      SHL = RHS;
+      Other = LHS;
+    }
+    if (SHL && SHL.getOperand(1) == N1 && SHL.hasOneUse()) {
+      SDValue ZExt = SHL.getOperand(0);
+      if (ZExt.getOpcode() == ISD::ZERO_EXTEND) {
+        unsigned NumLeadingZeros =
+            ZExt.getValueType().getScalarSizeInBits() -
+            ZExt.getOperand(0).getValueType().getScalarSizeInBits();
+        if (N1C->getZExtValue() <= NumLeadingZeros) {
           return DAG.getNode(
               N0.getOpcode(), SDLoc(N0), VT,
-              DAG.getNode(ISD::SRL, SDLoc(N0), VT, other, N1),
-              zext);
+              DAG.getNode(ISD::SRL, SDLoc(N0), VT, Other, N1),
+              ZExt);
         }
       }
     }

>From 6665d07a34ec2eb9e68fbeb00eed7fe2880ff743 Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 22:16:10 +0400
Subject: [PATCH 7/8] minor

---
 llvm/test/CodeGen/NVPTX/shift-opt.ll | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll
index 855f8f2f91cbc..010ea7ad5c844 100644
--- a/llvm/test/CodeGen/NVPTX/shift-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll
@@ -2,7 +2,7 @@
 
 define i64 @test_or(i64 %x, i32 %y) {
 ;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
 ; c1 <= leadingzeros(zext(y))
 ;
 ; CHECK-LABEL: test_or
@@ -21,7 +21,7 @@ define i64 @test_or(i64 %x, i32 %y) {
 
 define i64 @test_xor(i64 %x, i32 %y) {
 ;
-; srl (xor (x, shl(zext(y),c1)),c1) -> xor(srl(x,c1), zext(y))
+; Fold: srl (xor (x, shl(zext(y),c1)),c1) -> xor(srl(x,c1), zext(y))
 ; c1 <= leadingzeros(zext(y))
 ;
 ; CHECK-LABEL: test_xor
@@ -40,7 +40,7 @@ define i64 @test_xor(i64 %x, i32 %y) {
 
 define i64 @test_and(i64 %x, i32 %y) {
 ;
-; srl (and (x, shl(zext(y),c1)),c1) -> and(srl(x,c1), zext(y))
+; Fold: srl (and (x, shl(zext(y),c1)),c1) -> and(srl(x,c1), zext(y))
 ; c1 <= leadingzeros(zext(y))
 ;
 ; CHECK-LABEL: test_and
@@ -59,7 +59,7 @@ define i64 @test_and(i64 %x, i32 %y) {
 
 define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
 ;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
 ; c1 <= leadingzeros(zext(y))
 ; x, y - vectors
 ;
@@ -83,8 +83,8 @@ define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
 
 define i64 @test_negative_c(i64 %x, i32 %y) {
 ;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
-; c1 > leadingzeros(zext(y)).
+; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Reason: c1 > leadingzeros(zext(y)).
 ;
 ; CHECK-LABEL: test_negative_c
 ; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_c_param_0];
@@ -105,9 +105,8 @@ declare void @use(i64)
 
 define i64 @test_negative_use_lop(i64 %x, i32 %y) {
 ;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
-; c1 <= leadingzeros(zext(y))
-; multiple usage of "or"
+; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Reason: multiple usage of "or"
 ;
 ; CHECK-LABEL: test_negative_use_lop
 ; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_lop_param_0];
@@ -116,7 +115,7 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
 ; CHECK: or.b64       %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
 ; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[OR]], 5;
 ; CHECK: { // callseq
-; CHECK:      st.param.b64    [param0], %[[OR]];
+; CHECK:   st.param.b64    [param0], %[[OR]];
 ; CHECK: } // callseq
 ; CHECK: st.param.b64 [func_retval0], %[[SHR]];
 ;
@@ -128,12 +127,10 @@ define i64 @test_negative_use_lop(i64 %x, i32 %y) {
   ret i64 %srl
 }
 
-
 define i64 @test_negative_use_shl(i64 %x, i32 %y) {
 ;
-; srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
-; c1 <= leadingzeros(zext(y))
-; multiple usage of "shl"
+; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
+; Reason: multiple usage of "shl"
 ;
 ; CHECK-LABEL: test_negative_use_shl
 ; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_shl_param_0];
@@ -142,7 +139,7 @@ define i64 @test_negative_use_shl(i64 %x, i32 %y) {
 ; CHECK: or.b64       %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
 ; CHECK: shr.u64      %[[SHR:rd[0-9]+]], %[[OR]], 5;
 ; CHECK: { // callseq
-; CHECK:      st.param.b64    [param0], %[[SHL]];
+; CHECK:   st.param.b64    [param0], %[[SHL]];
 ; CHECK: } // callseq
 ; CHECK: st.param.b64 [func_retval0], %[[SHR]];
 ;

>From 4b2cdbb7703a35178ef3a1b14dd12fb319a8b9ad Mon Sep 17 00:00:00 2001
From: Alexander Peskov <apeskov at nvidia.com>
Date: Tue, 6 May 2025 23:17:07 +0400
Subject: [PATCH 8/8] fix lint

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f4aa1d8af6b15..7981a6a9b81db 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11001,10 +11001,9 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
             ZExt.getValueType().getScalarSizeInBits() -
             ZExt.getOperand(0).getValueType().getScalarSizeInBits();
         if (N1C->getZExtValue() <= NumLeadingZeros) {
-          return DAG.getNode(
-              N0.getOpcode(), SDLoc(N0), VT,
-              DAG.getNode(ISD::SRL, SDLoc(N0), VT, Other, N1),
-              ZExt);
+          return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+                             DAG.getNode(ISD::SRL, SDLoc(N0), VT, Other, N1),
+                             ZExt);
         }
       }
     }



More information about the llvm-commits mailing list