[llvm] [DAGCombiner][RISCV] Optimize (zext nneg (truncate X)) if X has known sign bits. (PR #82227)

Mon Feb 19 10:34:39 PST 2024

https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/82227

>From 987c0f89c8df36404e61ced91c71df0af088326d Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Sun, 18 Feb 2024 23:58:46 -0800
Subject: [PATCH] [DAGCombiner][RISCV] Optimize (zext nneg (truncate X)) if X
 has known sign bits.

This treats the zext nneg as sext if X is known to have sufficient
sign bits to allow the zext or truncate or both to removed. This
code is taken from the same optimization for sext.

Test cases are based on a common pattern where a zext nneg is created
based on a dominating condition that ensure the value is positive.
The value will be exported from the first block sign extended to
a legal type. This creates an AssertSExt and truncate in the next
block. Treating the zext nneg as a sext allows us to remove the zext.

The zext_nneg_crossbb_i32 test will be further improved when this
patch combines with #82199.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 ++++++++++++++++
 llvm/test/CodeGen/RISCV/sext-zext-trunc.ll    | 31 +++++--------------
 llvm/test/CodeGen/VE/Scalar/ctlz.ll           |  2 --
 3 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2a09e44e192979..030438a00955ee 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13758,6 +13758,32 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     EVT SrcVT = N0.getOperand(0).getValueType();
     EVT MinVT = N0.getValueType();
 
+    if (N->getFlags().hasNonNeg()) {
+      SDValue Op = N0.getOperand(0);
+      unsigned OpBits = SrcVT.getScalarSizeInBits();
+      unsigned MidBits = MinVT.getScalarSizeInBits();
+      unsigned DestBits = VT.getScalarSizeInBits();
+      unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+      if (OpBits == DestBits) {
+        // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
+        // bits, it is already ready.
+        if (NumSignBits > DestBits - MidBits)
+          return Op;
+      } else if (OpBits < DestBits) {
+        // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
+        // bits, just sext from i32.
+        // FIXME: This can probably be ZERO_EXTEND nneg?
+        if (NumSignBits > OpBits - MidBits)
+          return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
+      } else {
+        // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
+        // bits, just truncate to i32.
+        if (NumSignBits > OpBits - MidBits)
+          return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+      }
+    }
+
     // Try to mask before the extension to avoid having to generate a larger mask,
     // possibly over several sub-vectors.
     if (SrcVT.bitsLT(VT) && VT.isVector()) {
diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
index 5ed835c60acf0c..af136806f71845 100644
--- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
+++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
@@ -756,31 +756,18 @@ define void @zext_nneg_dominating_icmp_i64(i16 signext %0) {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    bltz a0, .LBB46_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    slli a0, a0, 16
-; RV32I-NEXT:    srli a0, a0, 16
-; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    srai a1, a0, 31
 ; RV32I-NEXT:    tail bar_i64
 ; RV32I-NEXT:  .LBB46_2:
 ; RV32I-NEXT:    ret
 ;
-; RV64I-LABEL: zext_nneg_dominating_icmp_i64:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    bltz a0, .LBB46_2
-; RV64I-NEXT:  # %bb.1:
-; RV64I-NEXT:    slli a0, a0, 48
-; RV64I-NEXT:    srli a0, a0, 48
-; RV64I-NEXT:    tail bar_i64
-; RV64I-NEXT:  .LBB46_2:
-; RV64I-NEXT:    ret
-;
-; RV64ZBB-LABEL: zext_nneg_dominating_icmp_i64:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    bltz a0, .LBB46_2
-; RV64ZBB-NEXT:  # %bb.1:
-; RV64ZBB-NEXT:    zext.h a0, a0
-; RV64ZBB-NEXT:    tail bar_i64
-; RV64ZBB-NEXT:  .LBB46_2:
-; RV64ZBB-NEXT:    ret
+; RV64-LABEL: zext_nneg_dominating_icmp_i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    bltz a0, .LBB46_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    tail bar_i64
+; RV64-NEXT:  .LBB46_2:
+; RV64-NEXT:    ret
   %2 = icmp sgt i16 %0, -1
   br i1 %2, label %3, label %5
 
@@ -800,8 +787,6 @@ define void @zext_nneg_dominating_icmp_i32(i16 signext %0) {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    bltz a0, .LBB47_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    slli a0, a0, 16
-; RV32I-NEXT:    srli a0, a0, 16
 ; RV32I-NEXT:    tail bar_i32
 ; RV32I-NEXT:  .LBB47_2:
 ; RV32I-NEXT:    ret
diff --git a/llvm/test/CodeGen/VE/Scalar/ctlz.ll b/llvm/test/CodeGen/VE/Scalar/ctlz.ll
index 57d1a352c1a76a..c8c2b11c5eef61 100644
--- a/llvm/test/CodeGen/VE/Scalar/ctlz.ll
+++ b/llvm/test/CodeGen/VE/Scalar/ctlz.ll
@@ -34,7 +34,6 @@ define signext i32 @func32s(i32 signext %p) {
 ; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    ldz %s0, %s0
 ; CHECK-NEXT:    lea %s0, -32(, %s0)
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    b.l.t (, %s10)
   %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 true)
   ret i32 %r
@@ -202,7 +201,6 @@ define signext i32 @func32sx(i32 signext %p) {
 ; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    ldz %s0, %s0
 ; CHECK-NEXT:    lea %s0, -32(, %s0)
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    b.l.t (, %s10)
   %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 false)
   ret i32 %r