[llvm] [DAGCombiner][RISCV] CSE zext nneg and sext. (PR #82597)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 22 00:12:50 PST 2024


https://github.com/topperc created https://github.com/llvm/llvm-project/pull/82597

If we have a sext and a zext nneg with the same types and operand
we should combine them into the sext. We can't go the other way
because the nneg flag may only be valid in the context of the uses
of the zext nneg.

The test case load, compare, branch, and zext are based on real patterns I've seen.

>From 2fbd63031f10a1dd8cec550008ddcb45af7cebd7 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 21 Feb 2024 23:48:19 -0800
Subject: [PATCH 1/2] [RISCV] Add test case showing missed opportunity to form
 sextload when sext and zext nneg are both present.

---
 llvm/test/CodeGen/RISCV/sext-zext-trunc.ll | 84 ++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
index a2a953ca882bad..078fc7f358b46c 100644
--- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
+++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
@@ -871,3 +871,87 @@ define void @zext_nneg_dominating_icmp_i32_zeroext(i16 signext %0) {
 5:
   ret void
 }
+
+; The load is used extended and non-extended in the successor basic block. The
+; signed compare will cause the non-extended value to exported out of the first
+; basic block using a sext. We need to CSE the zext nneg with the sext so that
+; we can form a sextload.
+define void @load_zext_nneg_sext_cse(ptr %p) nounwind {
+; RV32I-LABEL: load_zext_nneg_sext_cse:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lhu s0, 0(a0)
+; RV32I-NEXT:    slli a0, s0, 16
+; RV32I-NEXT:    bltz a0, .LBB50_2
+; RV32I-NEXT:  # %bb.1: # %bb1
+; RV32I-NEXT:    srai a0, a0, 16
+; RV32I-NEXT:    call bar_i16
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    tail bar_i32
+; RV32I-NEXT:  .LBB50_2: # %bb2
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: load_zext_nneg_sext_cse:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lhu s0, 0(a0)
+; RV64I-NEXT:    slli a0, s0, 48
+; RV64I-NEXT:    bltz a0, .LBB50_2
+; RV64I-NEXT:  # %bb.1: # %bb1
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    call bar_i16
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    tail bar_i32
+; RV64I-NEXT:  .LBB50_2: # %bb2
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: load_zext_nneg_sext_cse:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    addi sp, sp, -16
+; RV64ZBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64ZBB-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64ZBB-NEXT:    lhu s0, 0(a0)
+; RV64ZBB-NEXT:    sext.h a0, s0
+; RV64ZBB-NEXT:    bltz a0, .LBB50_2
+; RV64ZBB-NEXT:  # %bb.1: # %bb1
+; RV64ZBB-NEXT:    call bar_i16
+; RV64ZBB-NEXT:    mv a0, s0
+; RV64ZBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ZBB-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64ZBB-NEXT:    addi sp, sp, 16
+; RV64ZBB-NEXT:    tail bar_i32
+; RV64ZBB-NEXT:  .LBB50_2: # %bb2
+; RV64ZBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ZBB-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64ZBB-NEXT:    addi sp, sp, 16
+; RV64ZBB-NEXT:    ret
+  %load = load i16, ptr %p
+  %zext = zext nneg i16 %load to i32
+  %cmp = icmp sgt i16 %load, -1
+  br i1 %cmp, label %bb1, label %bb2
+
+bb1:
+  tail call void @bar_i16(i16 signext %load)
+  tail call void @bar_i32(i32 signext %zext)
+  br label %bb2
+
+bb2:
+  ret void
+}
+declare void @bar_i16(i16);

>From 5c46d9492ebf71de697d319b2192493fa4153155 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 22 Feb 2024 00:00:15 -0800
Subject: [PATCH 2/2] [DAGCombiner][RISCV] CSE zext nneg and sext.

If we have a sext and a zext nneg with the same types and operand
we should combine them into the sext. We can't go the other way
because the nneg flag may only be valid in the context of the uses
of the zext nneg.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  7 ++
 llvm/test/CodeGen/RISCV/sext-zext-trunc.ll    | 69 +++++++------------
 2 files changed, 30 insertions(+), 46 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 89ef648ee7d7ed..ed43dd7f528821 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13997,6 +13997,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
     return Res;
 
+  // CSE zext nneg with sext if the zext is not free.
+  if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
+    SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
+    if (CSENode)
+      return SDValue(CSENode, 0);
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
index 078fc7f358b46c..255a1257ef43ad 100644
--- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
+++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
@@ -882,11 +882,10 @@ define void @load_zext_nneg_sext_cse(ptr %p) nounwind {
 ; RV32I-NEXT:    addi sp, sp, -16
 ; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lhu s0, 0(a0)
-; RV32I-NEXT:    slli a0, s0, 16
-; RV32I-NEXT:    bltz a0, .LBB50_2
+; RV32I-NEXT:    lh s0, 0(a0)
+; RV32I-NEXT:    bltz s0, .LBB50_2
 ; RV32I-NEXT:  # %bb.1: # %bb1
-; RV32I-NEXT:    srai a0, a0, 16
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call bar_i16
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -899,48 +898,26 @@ define void @load_zext_nneg_sext_cse(ptr %p) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV64I-LABEL: load_zext_nneg_sext_cse:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -16
-; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lhu s0, 0(a0)
-; RV64I-NEXT:    slli a0, s0, 48
-; RV64I-NEXT:    bltz a0, .LBB50_2
-; RV64I-NEXT:  # %bb.1: # %bb1
-; RV64I-NEXT:    srai a0, a0, 48
-; RV64I-NEXT:    call bar_i16
-; RV64I-NEXT:    mv a0, s0
-; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 16
-; RV64I-NEXT:    tail bar_i32
-; RV64I-NEXT:  .LBB50_2: # %bb2
-; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 16
-; RV64I-NEXT:    ret
-;
-; RV64ZBB-LABEL: load_zext_nneg_sext_cse:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    addi sp, sp, -16
-; RV64ZBB-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64ZBB-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
-; RV64ZBB-NEXT:    lhu s0, 0(a0)
-; RV64ZBB-NEXT:    sext.h a0, s0
-; RV64ZBB-NEXT:    bltz a0, .LBB50_2
-; RV64ZBB-NEXT:  # %bb.1: # %bb1
-; RV64ZBB-NEXT:    call bar_i16
-; RV64ZBB-NEXT:    mv a0, s0
-; RV64ZBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64ZBB-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
-; RV64ZBB-NEXT:    addi sp, sp, 16
-; RV64ZBB-NEXT:    tail bar_i32
-; RV64ZBB-NEXT:  .LBB50_2: # %bb2
-; RV64ZBB-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64ZBB-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
-; RV64ZBB-NEXT:    addi sp, sp, 16
-; RV64ZBB-NEXT:    ret
+; RV64-LABEL: load_zext_nneg_sext_cse:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT:    lh s0, 0(a0)
+; RV64-NEXT:    bltz s0, .LBB50_2
+; RV64-NEXT:  # %bb.1: # %bb1
+; RV64-NEXT:    mv a0, s0
+; RV64-NEXT:    call bar_i16
+; RV64-NEXT:    mv a0, s0
+; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    tail bar_i32
+; RV64-NEXT:  .LBB50_2: # %bb2
+; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
   %load = load i16, ptr %p
   %zext = zext nneg i16 %load to i32
   %cmp = icmp sgt i16 %load, -1



More information about the llvm-commits mailing list