[llvm] [DAG] Add freeze(assertext(x)) -> assertext(x) folds (PR #94491)

Wed Jun 5 12:03:54 PDT 2024

https://github.com/bjope updated https://github.com/llvm/llvm-project/pull/94491

>From e2921182b13d71d76ccdb19463afb0399e74e78f Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson at ericsson.com>
Date: Wed, 5 Jun 2024 14:55:33 +0200
Subject: [PATCH 1/2] [DAG] Add freeze(assertext(x)) -> assertext(x) folds

This is a new attempt to allow pushing freeze through
AssertSext/AssertZext in some situations.

An earlier attempt tried to solve this by handling
AssertSext/AssertZext in canCreateUndefOrPoison. Simply saying that
result of the assert nodes only are poison/undef if the input is
poison/undef. That patch was reverted in
  https://github.com/llvm/llvm-project/commit/8505c3b15bfc535ff6624e71add4082680745187

What happened was that we could end up doing folds such as
  freeze(assertzext(x)) -> assertzext(freeze(x))
which isn't sound for operations that may "create" poison, unless
dropping "poision generating flags" from the operation that is pulled
out from the freeze. That is needed to guarantee that the result
isn't suddenly more poisonnous (the contraints on the result given by
the flags might not hold if the input is poison, so they need to be
dropped).
The same thing applies to assertSext/assertZext nodes (at least as
we say that they may create poison). For a poisoned input the
constraints given by the assertSext/assertZext node isn't fulfilled.
So we basically need to "drop" the knowledge given by the assert if
pulling assert nodes through freeze, or else the result must be
treated as still maybe being poison (making the fold invalid).

This patch is limiting the
  freeze(assertzext(x)) -> assertzext(freeze(x))
fold to the situation when x is known not to be undef/poison. That
means that we actually can drop the freeze, so the new DAG combine
in this patch is actually doing
  freeze(assertzext(x)) -> assertzext(x)

For the above to be sound the documentation of AssertSext/AssertZext
are updated to highlight that it is considered as immediate UB if
the input isn't satisfying the asserted condition (unless it is
poison).
---
 llvm/include/llvm/CodeGen/ISDOpcodes.h        |  5 ++++-
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 +++++++++++++
 llvm/test/CodeGen/RISCV/float-convert.ll      | 20 +++++++++----------
 llvm/test/CodeGen/RISCV/rv64zbb.ll            |  2 +-
 4 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 0f87e062e2da6..af4f39d77894f 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -57,7 +57,10 @@ enum NodeType {
   /// been extended, and the second is a value type node indicating the width
   /// of the extension.
   /// NOTE: In case of the source value (or any vector element value) is
-  /// poisoned the assertion will not be true for that value.
+  /// poisoned the assertion will not be true for that value and the
+  /// corresponding result value will be poison. If a source value isn't
+  /// satisfying the condition being asserted (while not being poison), then
+  /// this is considered as immediate undefined behavior.
   AssertSext,
   AssertZext,
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5148b7258257f..f0e65251b80f1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15592,6 +15592,20 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
   if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
     return N0;
 
+  // AssertSext/AssertZext nodes are a bit special and need custom
+  // hendling. They are used in contexts when the result is poison only if the
+  // input is poison, but they are still modelled as "canCreateUndefOrPoison" as
+  // the value range for the result is constrained by assumptions that wouldn't
+  // hold if the input is poison (and we can't drop poison-generating flags for
+  // AssertSext/AssertZext in the regular fold below).
+  if (N0.getOpcode() == ISD::AssertSext || N0.getOpcode() == ISD::AssertZext) {
+    // Fold freeze(assertext(x, ...)) -> assertext(x, ...),
+    // iff x is known not to be poison/undef.
+    if (DAG.isGuaranteedNotToBeUndefOrPoison(N0.getOperand(0),
+                                             /*PoisonOnly*/ false))
+      return N0;
+  }
+
   // We currently avoid folding freeze over SRA/SRL, due to the problems seen
   // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
   // example https://reviews.llvm.org/D136529#4120959.
diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index 7eabd3f5f2273..7ce78a8d2529f 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -966,26 +966,24 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
 ; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lui a1, 391168
-; RV64I-NEXT:    addiw a1, a1, -1
-; RV64I-NEXT:    call __gtsf2
-; RV64I-NEXT:    sgtz a0, a0
-; RV64I-NEXT:    neg s1, a0
-; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __gesf2
 ; RV64I-NEXT:    slti a0, a0, 0
-; RV64I-NEXT:    addi s2, a0, -1
+; RV64I-NEXT:    addi s1, a0, -1
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __fixunssfdi
-; RV64I-NEXT:    and a0, s2, a0
-; RV64I-NEXT:    or a0, s1, a0
+; RV64I-NEXT:    and s1, s1, a0
+; RV64I-NEXT:    lui a1, 391168
+; RV64I-NEXT:    addiw a1, a1, -1
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    call __gtsf2
+; RV64I-NEXT:    sgtz a0, a0
+; RV64I-NEXT:    neg a0, a0
+; RV64I-NEXT:    or a0, a0, s1
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 start:
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 4d5ef5db86057..dfbefbe27efc1 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1336,7 +1336,7 @@ define i32 @abs_i32(i32 %x) {
 define signext i32 @abs_i32_sext(i32 signext %x) {
 ; RV64I-LABEL: abs_i32_sext:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    sraiw a1, a0, 31
+; RV64I-NEXT:    srai a1, a0, 31
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    subw a0, a0, a1
 ; RV64I-NEXT:    ret

>From fc078305172609a2b12d2f7eca3dcd0c7562d910 Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson at ericsson.com>
Date: Wed, 5 Jun 2024 21:03:21 +0200
Subject: [PATCH 2/2] fixup: Move the logic to isGuaranteedNotToBeUndefOrPoison

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp  | 14 --------------
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  5 +++++
 llvm/test/CodeGen/RISCV/rv64xtheadbb.ll        | 12 +++++++-----
 llvm/test/CodeGen/RISCV/rv64zbb.ll             | 12 +++++++-----
 4 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f0e65251b80f1..5148b7258257f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15592,20 +15592,6 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
   if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
     return N0;
 
-  // AssertSext/AssertZext nodes are a bit special and need custom
-  // hendling. They are used in contexts when the result is poison only if the
-  // input is poison, but they are still modelled as "canCreateUndefOrPoison" as
-  // the value range for the result is constrained by assumptions that wouldn't
-  // hold if the input is poison (and we can't drop poison-generating flags for
-  // AssertSext/AssertZext in the regular fold below).
-  if (N0.getOpcode() == ISD::AssertSext || N0.getOpcode() == ISD::AssertZext) {
-    // Fold freeze(assertext(x, ...)) -> assertext(x, ...),
-    // iff x is known not to be poison/undef.
-    if (DAG.isGuaranteedNotToBeUndefOrPoison(N0.getOperand(0),
-                                             /*PoisonOnly*/ false))
-      return N0;
-  }
-
   // We currently avoid folding freeze over SRA/SRL, due to the problems seen
   // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
   // example https://reviews.llvm.org/D136529#4120959.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4a6a431696b52..34a1ddf2be0e2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5176,6 +5176,11 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
     return true;
   }
 
+  case ISD::AssertSext:
+  case ISD::AssertZext:
+    return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedElts,
+                                            PoisonOnly, Depth + 1);
+
     // TODO: Search for noundef attributes from library functions.
 
     // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
index 6cdab888ffcde..cba623cd24f87 100644
--- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
@@ -239,15 +239,17 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
 ; RV64I-NEXT:    srliw a0, a0, 1
 ; RV64I-NEXT:    beqz a0, .LBB4_2
 ; RV64I-NEXT:  # %bb.1: # %cond.false
-; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    srli a1, a0, 1
 ; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    srli a1, a0, 2
 ; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    slli a1, a0, 33
+; RV64I-NEXT:    srli a1, a1, 41
 ; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    slli a1, a0, 33
+; RV64I-NEXT:    srli a1, a1, 49
 ; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    not a0, a0
 ; RV64I-NEXT:    srli a1, a0, 1
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index dfbefbe27efc1..ca94dac21b0a9 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -231,15 +231,17 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
 ; RV64I-NEXT:    srliw a0, a0, 1
 ; RV64I-NEXT:    beqz a0, .LBB4_2
 ; RV64I-NEXT:  # %bb.1: # %cond.false
-; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    srli a1, a0, 1
 ; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srliw a1, a0, 2
+; RV64I-NEXT:    srli a1, a0, 2
 ; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srliw a1, a0, 4
+; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    slli a1, a0, 33
+; RV64I-NEXT:    srli a1, a1, 41
 ; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srliw a1, a0, 16
+; RV64I-NEXT:    slli a1, a0, 33
+; RV64I-NEXT:    srli a1, a1, 49
 ; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    not a0, a0
 ; RV64I-NEXT:    srli a1, a0, 1