[llvm] [DAGCombiner][RISCV] Optimize (zext nneg (truncate X)) if X has known sign bits. (PR #82227)

Mon Feb 19 00:11:18 PST 2024

https://github.com/topperc created https://github.com/llvm/llvm-project/pull/82227

This treats the zext nneg as sext if X is known to have sufficient sign bits to allow the zext or truncate or both to removed. This code is taken from the same optimization for sext.

Test cases are based on a common pattern where a zext nneg is created based on a dominating condition that ensure the value is positive. The value will be exported from the first block sign extended to a legal type. This creates an AssertSExt and truncate in the next block. Treating the zext nneg as a sext allows us to remove the zext.

The zext_nneg_crossbb_i32 test will be further improved when this patch combines with #82199.

>From 132158c8a2dd13bd4b9c9a23297a134d4615b627 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Sun, 18 Feb 2024 23:58:46 -0800
Subject: [PATCH] [DAGCombiner][RISCV] Optimize (zext nneg (truncate X)) if X
 has known sign bits.

This treats the zext nneg as sext if X is known to have sufficient
sign bits to allow the zext or truncate or both to removed. This
code is taken from the same optimization for sext.

Test cases are based on a common pattern where a zext nneg is created
based on a dominating condition that ensure the value is positive.
The value will be exported from the first block sign extended to
a legal type. This creates an AssertSExt and truncate in the next
block. Treating the zext nneg as a sext allows us to remove the zext.

The zext_nneg_crossbb_i32 test will be further improved when this
patch combines with #82199.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 +++++++
 llvm/test/CodeGen/RISCV/sext-zext-trunc.ll    | 72 +++++++++++++++++++
 llvm/test/CodeGen/VE/Scalar/ctlz.ll           |  2 -
 3 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2a09e44e192979..b259d3e20cfc1d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13758,6 +13758,32 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     EVT SrcVT = N0.getOperand(0).getValueType();
     EVT MinVT = N0.getValueType();
 
+    if (N->getFlags().hasNonNeg()) {
+      SDValue Op = N0.getOperand(0);
+      unsigned OpBits = SrcVT.getScalarSizeInBits();
+      unsigned MidBits = MinVT.getScalarSizeInBits();
+      unsigned DestBits = VT.getScalarSizeInBits();
+      unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+      if (OpBits == DestBits) {
+        // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
+        // bits, it is already ready.
+        if (NumSignBits > DestBits-MidBits)
+          return Op;
+      } else if (OpBits < DestBits) {
+        // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
+        // bits, just sext from i32.
+        // FIXME: This can probably be ZERO_EXTEND nneg?
+        if (NumSignBits > OpBits-MidBits)
+          return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
+      } else {
+        // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
+        // bits, just truncate to i32.
+        if (NumSignBits > OpBits-MidBits)
+          return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+      }
+    }
+
     // Try to mask before the extension to avoid having to generate a larger mask,
     // possibly over several sub-vectors.
     if (SrcVT.bitsLT(VT) && VT.isVector()) {
diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
index 20d73acddea01b..4185fbbf6e1155 100644
--- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
+++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
@@ -750,3 +750,75 @@ define i64 @dec_of_zexted_cmp_i64(i64 %x) {
   %dec = sub i64 %zext, 1
   ret i64 %dec
 }
+
+define void @zext_nneg_crossbb_i64(i16 signext %0) {
+; RV32I-LABEL: zext_nneg_crossbb_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    bltz a0, .LBB46_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srai a1, a0, 31
+; RV32I-NEXT:    tail bar_i64
+; RV32I-NEXT:  .LBB46_2:
+; RV32I-NEXT:    ret
+;
+; RV64-LABEL: zext_nneg_crossbb_i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    bltz a0, .LBB46_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    tail bar_i64
+; RV64-NEXT:  .LBB46_2:
+; RV64-NEXT:    ret
+  %2 = icmp sgt i16 %0, -1
+  br i1 %2, label %3, label %5
+
+3:
+  %4 = zext nneg i16 %0 to i64
+  tail call void @bar_i64(i64 %4)
+  br label %5
+
+5:
+  ret void
+}
+
+declare void @bar_i64(i64)
+
+define void @zext_nneg_crossbb_i32(i16 signext %0) {
+; RV32I-LABEL: zext_nneg_crossbb_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    bltz a0, .LBB47_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    tail bar_i32
+; RV32I-NEXT:  .LBB47_2:
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: zext_nneg_crossbb_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    bltz a0, .LBB47_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    tail bar_i32
+; RV64I-NEXT:  .LBB47_2:
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: zext_nneg_crossbb_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    bltz a0, .LBB47_2
+; RV64ZBB-NEXT:  # %bb.1:
+; RV64ZBB-NEXT:    zext.h a0, a0
+; RV64ZBB-NEXT:    tail bar_i32
+; RV64ZBB-NEXT:  .LBB47_2:
+; RV64ZBB-NEXT:    ret
+  %2 = icmp sgt i16 %0, -1
+  br i1 %2, label %3, label %5
+
+3:
+  %4 = zext nneg i16 %0 to i32
+  tail call void @bar_i32(i32 %4)
+  br label %5
+
+5:
+  ret void
+}
+
+declare void @bar_i32(i32)
diff --git a/llvm/test/CodeGen/VE/Scalar/ctlz.ll b/llvm/test/CodeGen/VE/Scalar/ctlz.ll
index 57d1a352c1a76a..c8c2b11c5eef61 100644
--- a/llvm/test/CodeGen/VE/Scalar/ctlz.ll
+++ b/llvm/test/CodeGen/VE/Scalar/ctlz.ll
@@ -34,7 +34,6 @@ define signext i32 @func32s(i32 signext %p) {
 ; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    ldz %s0, %s0
 ; CHECK-NEXT:    lea %s0, -32(, %s0)
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    b.l.t (, %s10)
   %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 true)
   ret i32 %r
@@ -202,7 +201,6 @@ define signext i32 @func32sx(i32 signext %p) {
 ; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    ldz %s0, %s0
 ; CHECK-NEXT:    lea %s0, -32(, %s0)
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    b.l.t (, %s10)
   %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 false)
   ret i32 %r