[llvm] [RISCV] Insert a freeze before converting select to AND/OR. (PR #84232)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 6 12:52:25 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
Select blocks poison, but AND/OR do not. We need to insert a freeze
to block poison propagation.
This creates suboptimal codegen which I will try to fix with other
patches. We should prioritize the correctness fix.
Fixes #<!-- -->84200.
---
Patch is 342.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/84232.diff
21 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+9-8)
- (modified) llvm/test/CodeGen/RISCV/alu64.ll (+2-1)
- (modified) llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll (+8-8)
- (modified) llvm/test/CodeGen/RISCV/bfloat-convert.ll (+96-73)
- (modified) llvm/test/CodeGen/RISCV/double-convert.ll (+75-62)
- (modified) llvm/test/CodeGen/RISCV/double-round-conv-sat.ll (+432-372)
- (modified) llvm/test/CodeGen/RISCV/float-convert.ll (+124-112)
- (modified) llvm/test/CodeGen/RISCV/float-round-conv-sat.ll (+324-312)
- (modified) llvm/test/CodeGen/RISCV/forced-atomics.ll (+2-1)
- (modified) llvm/test/CodeGen/RISCV/fpclamptosat.ll (+216-194)
- (modified) llvm/test/CodeGen/RISCV/half-convert.ll (+212-158)
- (modified) llvm/test/CodeGen/RISCV/half-round-conv-sat.ll (+690-666)
- (modified) llvm/test/CodeGen/RISCV/iabs.ll (+44-44)
- (added) llvm/test/CodeGen/RISCV/pr84200.ll (+22)
- (modified) llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll (+8-8)
- (modified) llvm/test/CodeGen/RISCV/rv32zbs.ll (+20-20)
- (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll (+6-10)
- (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll (+5-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll (+306-311)
- (modified) llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll (+13-13)
- (modified) llvm/test/CodeGen/RISCV/signed-truncation-check.ll (+6-3)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4c3dc63afd878d..1efb6ff409379a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7246,25 +7246,25 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
// (select c, -1, y) -> -c | y
if (isAllOnesConstant(TrueV)) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
- return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
+ return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
}
// (select c, y, -1) -> (c-1) | y
if (isAllOnesConstant(FalseV)) {
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
DAG.getAllOnesConstant(DL, VT));
- return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
+ return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
}
// (select c, 0, y) -> (c-1) & y
if (isNullConstant(TrueV)) {
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
DAG.getAllOnesConstant(DL, VT));
- return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
+ return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
}
// (select c, y, 0) -> -c & y
if (isNullConstant(FalseV)) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
- return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
+ return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
}
}
@@ -7274,6 +7274,7 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
const APInt &FalseVal = FalseV->getAsAPIntVal();
if (~TrueVal == FalseVal) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
+ FalseV = DAG.getFreeze(FalseV);
return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
}
}
@@ -7289,14 +7290,14 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
// (select x, x, y) -> x | y
// (select !x, x, y) -> x & y
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
- return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
- FalseV);
+ return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, DAG.getFreeze(TrueV),
+ DAG.getFreeze(FalseV));
}
// (select x, y, x) -> x & y
// (select !x, y, x) -> x | y
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
- return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
- FalseV);
+ return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, DAG.getFreeze(TrueV),
+ DAG.getFreeze(FalseV));
}
}
diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll
index f032756e007b68..e16f6abcca244c 100644
--- a/llvm/test/CodeGen/RISCV/alu64.ll
+++ b/llvm/test/CodeGen/RISCV/alu64.ll
@@ -58,7 +58,8 @@ define i64 @sltiu(i64 %a) nounwind {
; RV32I-LABEL: sltiu:
; RV32I: # %bb.0:
; RV32I-NEXT: sltiu a0, a0, 3
-; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: snez a1, a1
+; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
index aa962d68fc5285..5914e45a153302 100644
--- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
@@ -372,10 +372,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; RV32IA-NEXT: # =>This Loop Header: Depth=1
; RV32IA-NEXT: # Child Loop BB2_3 Depth 2
; RV32IA-NEXT: mv a3, a2
-; RV32IA-NEXT: addi a2, a2, 1
-; RV32IA-NEXT: sltu a4, a3, a1
-; RV32IA-NEXT: neg a4, a4
-; RV32IA-NEXT: and a4, a4, a2
+; RV32IA-NEXT: addi a4, a2, 1
+; RV32IA-NEXT: sltu a2, a2, a1
+; RV32IA-NEXT: neg a2, a2
+; RV32IA-NEXT: and a4, a2, a4
; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start
; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
@@ -607,10 +607,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB3_3 Depth 2
; RV64IA-NEXT: mv a3, a2
-; RV64IA-NEXT: addi a2, a2, 1
-; RV64IA-NEXT: sltu a4, a3, a1
-; RV64IA-NEXT: neg a4, a4
-; RV64IA-NEXT: and a4, a4, a2
+; RV64IA-NEXT: addi a4, a2, 1
+; RV64IA-NEXT: sltu a2, a2, a1
+; RV64IA-NEXT: neg a2, a2
+; RV64IA-NEXT: and a4, a2, a4
; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
index d533607ad54e38..0216d00be21854 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
@@ -456,121 +456,142 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind {
define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat:
; RV32IZFBFMIN: # %bb.0: # %start
-; RV32IZFBFMIN-NEXT: addi sp, sp, -16
-; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: addi sp, sp, -32
+; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
+; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
+; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0
+; RV32IZFBFMIN-NEXT: neg s1, s0
; RV32IZFBFMIN-NEXT: lui a0, 913408
; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0
-; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0
+; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0
+; RV32IZFBFMIN-NEXT: neg s3, s2
; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0
; RV32IZFBFMIN-NEXT: call __fixsfdi
+; RV32IZFBFMIN-NEXT: and a0, s3, a0
+; RV32IZFBFMIN-NEXT: or a0, s1, a0
+; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0
+; RV32IZFBFMIN-NEXT: neg a2, a2
; RV32IZFBFMIN-NEXT: lui a4, 524288
-; RV32IZFBFMIN-NEXT: lui a2, 524288
-; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2
+; RV32IZFBFMIN-NEXT: li a5, 1
+; RV32IZFBFMIN-NEXT: lui a3, 524288
+; RV32IZFBFMIN-NEXT: bne s2, a5, .LBB10_2
; RV32IZFBFMIN-NEXT: # %bb.1: # %start
-; RV32IZFBFMIN-NEXT: mv a2, a1
+; RV32IZFBFMIN-NEXT: mv a3, a1
; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
-; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
-; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
-; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0
-; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4
+; RV32IZFBFMIN-NEXT: and a0, a2, a0
+; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4
; RV32IZFBFMIN-NEXT: # %bb.3:
-; RV32IZFBFMIN-NEXT: addi a2, a4, -1
+; RV32IZFBFMIN-NEXT: addi a3, a4, -1
; RV32IZFBFMIN-NEXT: .LBB10_4: # %start
-; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0
-; RV32IZFBFMIN-NEXT: neg a4, a1
-; RV32IZFBFMIN-NEXT: and a1, a4, a2
-; RV32IZFBFMIN-NEXT: neg a2, a3
-; RV32IZFBFMIN-NEXT: neg a3, s0
-; RV32IZFBFMIN-NEXT: and a0, a3, a0
-; RV32IZFBFMIN-NEXT: or a0, a2, a0
-; RV32IZFBFMIN-NEXT: and a0, a4, a0
-; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
-; RV32IZFBFMIN-NEXT: addi sp, sp, 16
+; RV32IZFBFMIN-NEXT: and a1, a2, a3
+; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: addi sp, sp, 32
; RV32IZFBFMIN-NEXT: ret
;
; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat:
; R32IDZFBFMIN: # %bb.0: # %start
-; R32IDZFBFMIN-NEXT: addi sp, sp, -16
-; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT: addi sp, sp, -32
+; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
+; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
+; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
+; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0
+; R32IDZFBFMIN-NEXT: neg s1, s0
; R32IDZFBFMIN-NEXT: lui a0, 913408
; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0
-; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0
+; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0
+; R32IDZFBFMIN-NEXT: neg s3, s2
; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0
; R32IDZFBFMIN-NEXT: call __fixsfdi
+; R32IDZFBFMIN-NEXT: and a0, s3, a0
+; R32IDZFBFMIN-NEXT: or a0, s1, a0
+; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0
+; R32IDZFBFMIN-NEXT: neg a2, a2
; R32IDZFBFMIN-NEXT: lui a4, 524288
-; R32IDZFBFMIN-NEXT: lui a2, 524288
-; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2
+; R32IDZFBFMIN-NEXT: li a5, 1
+; R32IDZFBFMIN-NEXT: lui a3, 524288
+; R32IDZFBFMIN-NEXT: bne s2, a5, .LBB10_2
; R32IDZFBFMIN-NEXT: # %bb.1: # %start
-; R32IDZFBFMIN-NEXT: mv a2, a1
+; R32IDZFBFMIN-NEXT: mv a3, a1
; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
-; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
-; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
-; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0
-; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4
+; R32IDZFBFMIN-NEXT: and a0, a2, a0
+; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4
; R32IDZFBFMIN-NEXT: # %bb.3:
-; R32IDZFBFMIN-NEXT: addi a2, a4, -1
+; R32IDZFBFMIN-NEXT: addi a3, a4, -1
; R32IDZFBFMIN-NEXT: .LBB10_4: # %start
-; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0
-; R32IDZFBFMIN-NEXT: neg a4, a1
-; R32IDZFBFMIN-NEXT: and a1, a4, a2
-; R32IDZFBFMIN-NEXT: neg a2, a3
-; R32IDZFBFMIN-NEXT: neg a3, s0
-; R32IDZFBFMIN-NEXT: and a0, a3, a0
-; R32IDZFBFMIN-NEXT: or a0, a2, a0
-; R32IDZFBFMIN-NEXT: and a0, a4, a0
-; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT: and a1, a2, a3
+; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; R32IDZFBFMIN-NEXT: addi sp, sp, 16
+; R32IDZFBFMIN-NEXT: addi sp, sp, 32
; R32IDZFBFMIN-NEXT: ret
;
; RV32ID-LABEL: fcvt_l_bf16_sat:
; RV32ID: # %bb.0: # %start
-; RV32ID-NEXT: addi sp, sp, -16
-; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: addi sp, sp, -32
+; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32ID-NEXT: lui a0, %hi(.LCPI10_0)
+; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: slli a0, a0, 16
; RV32ID-NEXT: fmv.w.x fs0, a0
+; RV32ID-NEXT: flt.s s0, fa5, fs0
+; RV32ID-NEXT: neg s1, s0
; RV32ID-NEXT: lui a0, 913408
; RV32ID-NEXT: fmv.w.x fa5, a0
-; RV32ID-NEXT: fle.s s0, fa5, fs0
+; RV32ID-NEXT: fle.s s2, fa5, fs0
+; RV32ID-NEXT: neg s3, s2
; RV32ID-NEXT: fmv.s fa0, fs0
; RV32ID-NEXT: call __fixsfdi
+; RV32ID-NEXT: and a0, s3, a0
+; RV32ID-NEXT: or a0, s1, a0
+; RV32ID-NEXT: feq.s a2, fs0, fs0
+; RV32ID-NEXT: neg a2, a2
; RV32ID-NEXT: lui a4, 524288
-; RV32ID-NEXT: lui a2, 524288
-; RV32ID-NEXT: beqz s0, .LBB10_2
+; RV32ID-NEXT: li a5, 1
+; RV32ID-NEXT: lui a3, 524288
+; RV32ID-NEXT: bne s2, a5, .LBB10_2
; RV32ID-NEXT: # %bb.1: # %start
-; RV32ID-NEXT: mv a2, a1
+; RV32ID-NEXT: mv a3, a1
; RV32ID-NEXT: .LBB10_2: # %start
-; RV32ID-NEXT: lui a1, %hi(.LCPI10_0)
-; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
-; RV32ID-NEXT: flt.s a3, fa5, fs0
-; RV32ID-NEXT: beqz a3, .LBB10_4
+; RV32ID-NEXT: and a0, a2, a0
+; RV32ID-NEXT: beqz s0, .LBB10_4
; RV32ID-NEXT: # %bb.3:
-; RV32ID-NEXT: addi a2, a4, -1
+; RV32ID-NEXT: addi a3, a4, -1
; RV32ID-NEXT: .LBB10_4: # %start
-; RV32ID-NEXT: feq.s a1, fs0, fs0
-; RV32ID-NEXT: neg a4, a1
-; RV32ID-NEXT: and a1, a4, a2
-; RV32ID-NEXT: neg a2, a3
-; RV32ID-NEXT: neg a3, s0
-; RV32ID-NEXT: and a0, a3, a0
-; RV32ID-NEXT: or a0, a2, a0
-; RV32ID-NEXT: and a0, a4, a0
-; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: and a1, a2, a3
+; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32ID-NEXT: addi sp, sp, 16
+; RV32ID-NEXT: addi sp, sp, 32
; RV32ID-NEXT: ret
;
; CHECK64ZFBFMIN-LABEL: fcvt_l_bf16_sat:
@@ -654,7 +675,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
; CHECK32ZFBFMIN-NEXT: neg s0, a0
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa5, zero
; CHECK32ZFBFMIN-NEXT: fle.s a0, fa5, fa0
-; CHECK32ZFBFMIN-NEXT: neg s1, a0
+; CHECK32ZFBFMIN-NEXT: xori a0, a0, 1
+; CHECK32ZFBFMIN-NEXT: addi s1, a0, -1
; CHECK32ZFBFMIN-NEXT: call __fixunssfdi
; CHECK32ZFBFMIN-NEXT: and a0, s1, a0
; CHECK32ZFBFMIN-NEXT: or a0, s0, a0
@@ -681,7 +703,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
; RV32ID-NEXT: neg s0, a0
; RV32ID-NEXT: fmv.w.x fa5, zero
; RV32ID-NEXT: fle.s a0, fa5, fa0
-; RV32ID-NEXT: neg s1, a0
+; RV32ID-NEXT: xori a0, a0, 1
+; RV32ID-NEXT: addi s1, a0, -1
; RV32ID-NEXT: call __fixunssfdi
; RV32ID-NEXT: and a0, s1, a0
; RV32ID-NEXT: or a0, s0, a0
diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index eb8ffe75ef7697..f2e37f55521bac 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -749,40 +749,47 @@ define i64 @fcvt_l_d(double %a) nounwind {
define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32IFD-LABEL: fcvt_l_d_sat:
; RV32IFD: # %bb.0: # %start
-; RV32IFD-NEXT: addi sp, sp, -16
-; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: addi sp, sp, -32
+; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: lui a0, %hi(.LCPI12_0)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
+; RV32IFD-NEXT: lui a0, %hi(.LCPI12_1)
+; RV32IFD-NEXT: fld fa4, %lo(.LCPI12_1)(a0)
; RV32IFD-NEXT: fmv.d fs0, fa0
-; RV32IFD-NEXT: fle.d s0, fa5, fa0
+; RV32IFD-NEXT: flt.d s0, fa5, fa0
+; RV32IFD-NEXT: neg s1, s0
+; RV32IFD-NEXT: fle.d s2, fa4, fa0
+; RV32IFD-NEXT: neg s3, s2
; RV32IFD-NEXT: call __fixdfdi
+; RV32IFD-NEXT: and a0, s3, a0
+; RV32IFD-NEXT: or a0, s1, a0
+; RV32IFD-NEXT: feq.d a2, fs0, fs0
+; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: lui a2, 524288
-; RV32IFD-NEXT: beqz s0, .LBB12_2
+; RV32IFD-NEXT: li a5, 1
+; RV32IFD-NEXT: lui a3, 524288
+; RV32IFD-NEXT: bne s2, a5, .LBB12_2
; RV32IFD-NEXT: # %bb.1: # %start
-; RV32IFD-NEXT: mv a2, a1
+; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB12_2: # %start
-; RV32IFD-NEXT: lui a1, %hi(.LCPI12_1)
-; RV32IFD-NEXT: fld fa5, %lo(.LCPI12_1)(a1)
-; RV32IFD-NEXT: flt.d a3, fa5, fs0
-; RV32IFD-NEXT: beqz a3, .LBB12_4
+; RV32IFD-NEXT: and a0, a2, a0
+; RV32IFD-NEXT: beqz s0, .LBB12_4
; RV32IFD-NEXT: # %bb.3:
-; RV32IFD-NEXT: addi a2, a4, -1
+; RV32IFD-NEXT: addi a3, a4, -1
; RV32IFD-NEXT: .LBB12_4: # %start
-; RV32IFD-NEXT: feq.d a1, fs0, fs0
-; RV32IFD-NEXT: neg a4, a1
-; RV32IFD-NEXT: and a1, a4, a2
-; RV32IFD-NEXT: neg a2, a3
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
-; RV32IFD-NEXT: or a0, a2, a0
-; RV32IFD-NEXT: and a0, a4, a0
-; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: and a1, a2, a3
+; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: fcvt_l_d_sat:
@@ -800,40 +807,45 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
-; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
-; RV32IZFINXZDINX-NEXT: lw s0, 8(sp)
-; RV32IZFINXZDINX-NEXT: lw s1, 12(sp)
-; RV32IZFINXZDINX-NEXT: call __fixdfdi
+; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw a0, 0(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 4(sp)
+; RV32IZFINXZDINX-NEXT: lw s0, 0(sp)
+; RV32IZFINXZDINX-NEXT: lw s1, 4(sp)
; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_0)
; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI12_0+4)(a2)
; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI12_0)(a2)
-; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0
+; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0
+; RV32IZFINXZDINX-NEXT: neg s3, s2
+; RV32IZFINXZDINX-NEXT: call __fixdfdi
+; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_1)
+; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI12_1+4)(a2)
+; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI12_1)(a2)
+; RV32IZFINXZDINX-NEXT: and a0, s3, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a3
+; RV32IZFINXZDINX-NEXT: or a0, a2, a0
+; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a2
; RV32IZFINXZDINX-NEXT: lui a5, 524288
-; RV32IZFINXZDINX-NEXT: lui a3, 524288
-; RV32IZFINXZDINX-NEXT: beqz a2, .LBB12_2
+; RV32IZFINXZDINX-NEXT: li a6, 1
+; RV32IZFINXZDINX-NEXT: lui a4, 524288
+; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB12_2
; RV32IZFINXZDINX-NEXT: # %bb.1: # %start
-; RV32IZFINXZDINX-NEXT: mv a3, a1
+; RV32IZFINXZDINX-NEXT: mv a4, a1
; RV32IZFINXZDINX-NEXT: .LBB12_2: # %start
-; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI12_1)
-; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI12_1)(a1)
-; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI12_1+4)(a1)
-; RV32IZFINXZDINX-NEXT: flt.d a4, a6, s0
-; RV32IZFINXZDINX-NEXT: beqz a4, .LBB12_4
+; RV32IZFINX...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/84232
More information about the llvm-commits
mailing list