[llvm] 5eb24fd - [SelectionDAG][RISCV] Preserve nneg flag when folding (trunc (zext X))->(zext X). (#144807)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 19 08:06:11 PDT 2025
Author: Craig Topper
Date: 2025-06-19T08:06:07-07:00
New Revision: 5eb24fde11cd82a08f208509f80f428da90c89c9
URL: https://github.com/llvm/llvm-project/commit/5eb24fde11cd82a08f208509f80f428da90c89c9
DIFF: https://github.com/llvm/llvm-project/commit/5eb24fde11cd82a08f208509f80f428da90c89c9.diff
LOG: [SelectionDAG][RISCV] Preserve nneg flag when folding (trunc (zext X))->(zext X). (#144807)
If X is known non-negative, that's still true if we fold the truncate
to create a smaller zext.
In the i128 tests, SelectionDAGBuilder aggressively truncates the
`zext nneg` to i64 to match `getShiftAmountTy`. If we don't preserve
the `nneg` we can't see that the shift amount argument being `signext`
means we don't need to do any extension
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/RISCV/shifts.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0e078f9dd88b4..a6b9cc81edde6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15740,8 +15740,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
N0.getOpcode() == ISD::SIGN_EXTEND ||
N0.getOpcode() == ISD::ANY_EXTEND) {
// if the source is smaller than the dest, we still need an extend.
- if (N0.getOperand(0).getValueType().bitsLT(VT))
- return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
+ if (N0.getOperand(0).getValueType().bitsLT(VT)) {
+ SDNodeFlags Flags;
+ if (N0.getOpcode() == ISD::ZERO_EXTEND)
+ Flags.setNonNeg(N0->getFlags().hasNonNeg());
+ return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
+ }
// if the source is larger than the dest, than we just need the truncate.
if (N0.getOperand(0).getValueType().bitsGT(VT))
return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b0e3f534e2aaa..5d8db8be9731f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6474,8 +6474,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
OpOpcode == ISD::ANY_EXTEND) {
// If the source is smaller than the dest, we still need an extend.
if (N1.getOperand(0).getValueType().getScalarType().bitsLT(
- VT.getScalarType()))
- return getNode(OpOpcode, DL, VT, N1.getOperand(0));
+ VT.getScalarType())) {
+ SDNodeFlags Flags;
+ if (OpOpcode == ISD::ZERO_EXTEND)
+ Flags.setNonNeg(N1->getFlags().hasNonNeg());
+ return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags);
+ }
if (N1.getOperand(0).getValueType().bitsGT(VT))
return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0));
return N1.getOperand(0);
diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index 249dabba0cc28..32a037918a5a7 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -484,3 +484,298 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
%res = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %b)
ret i128 %res
}
+
+define i64 @lshr64_shamt32(i64 %a, i32 signext %b) nounwind {
+; RV32I-LABEL: lshr64_shamt32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi a4, a2, -32
+; RV32I-NEXT: srl a3, a1, a2
+; RV32I-NEXT: bltz a4, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a0, a3
+; RV32I-NEXT: j .LBB11_3
+; RV32I-NEXT: .LBB11_2:
+; RV32I-NEXT: srl a0, a0, a2
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: sll a1, a1, a2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: .LBB11_3:
+; RV32I-NEXT: srai a1, a4, 31
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: lshr64_shamt32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srl a0, a0, a1
+; RV64I-NEXT: ret
+ %zext = zext nneg i32 %b to i64
+ %1 = lshr i64 %a, %zext
+ ret i64 %1
+}
+
+define i64 @ashr64_shamt32(i64 %a, i32 signext %b) nounwind {
+; RV32I-LABEL: ashr64_shamt32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: mv a3, a1
+; RV32I-NEXT: addi a4, a2, -32
+; RV32I-NEXT: sra a1, a1, a2
+; RV32I-NEXT: bltz a4, .LBB12_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srai a3, a3, 31
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB12_2:
+; RV32I-NEXT: srl a0, a0, a2
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: sll a2, a3, a2
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ashr64_shamt32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sra a0, a0, a1
+; RV64I-NEXT: ret
+ %zext = zext nneg i32 %b to i64
+ %1 = ashr i64 %a, %zext
+ ret i64 %1
+}
+
+define i64 @shl64_shamt32(i64 %a, i32 signext %b) nounwind {
+; RV32I-LABEL: shl64_shamt32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi a4, a2, -32
+; RV32I-NEXT: sll a3, a0, a2
+; RV32I-NEXT: bltz a4, .LBB13_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: j .LBB13_3
+; RV32I-NEXT: .LBB13_2:
+; RV32I-NEXT: sll a1, a1, a2
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: srl a0, a0, a2
+; RV32I-NEXT: or a1, a1, a0
+; RV32I-NEXT: .LBB13_3:
+; RV32I-NEXT: srai a0, a4, 31
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: shl64_shamt32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sll a0, a0, a1
+; RV64I-NEXT: ret
+ %zext = zext nneg i32 %b to i64
+ %1 = shl i64 %a, %zext
+ ret i64 %1
+}
+
+define i128 @lshr128_shamt32(i128 %a, i32 signext %b) nounwind {
+; RV32I-LABEL: lshr128_shamt32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a4, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: sw zero, 16(sp)
+; RV32I-NEXT: sw zero, 20(sp)
+; RV32I-NEXT: sw zero, 24(sp)
+; RV32I-NEXT: sw zero, 28(sp)
+; RV32I-NEXT: srli a6, a2, 3
+; RV32I-NEXT: mv a7, sp
+; RV32I-NEXT: andi t0, a2, 31
+; RV32I-NEXT: andi a6, a6, 12
+; RV32I-NEXT: xori t0, t0, 31
+; RV32I-NEXT: add a6, a7, a6
+; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a1, 12(sp)
+; RV32I-NEXT: lw a1, 0(a6)
+; RV32I-NEXT: lw a3, 4(a6)
+; RV32I-NEXT: lw a4, 8(a6)
+; RV32I-NEXT: lw a5, 12(a6)
+; RV32I-NEXT: srl a1, a1, a2
+; RV32I-NEXT: slli a6, a3, 1
+; RV32I-NEXT: srl a3, a3, a2
+; RV32I-NEXT: slli a7, a4, 1
+; RV32I-NEXT: srl a4, a4, a2
+; RV32I-NEXT: srl a2, a5, a2
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: sll a6, a6, t0
+; RV32I-NEXT: sll a7, a7, t0
+; RV32I-NEXT: sll a5, a5, t0
+; RV32I-NEXT: or a1, a1, a6
+; RV32I-NEXT: or a3, a3, a7
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
+; RV32I-NEXT: sw a2, 12(a0)
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: lshr128_shamt32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi a4, a2, -64
+; RV64I-NEXT: srl a3, a1, a2
+; RV64I-NEXT: bltz a4, .LBB14_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: j .LBB14_3
+; RV64I-NEXT: .LBB14_2:
+; RV64I-NEXT: srl a0, a0, a2
+; RV64I-NEXT: not a2, a2
+; RV64I-NEXT: slli a1, a1, 1
+; RV64I-NEXT: sll a1, a1, a2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: .LBB14_3:
+; RV64I-NEXT: srai a1, a4, 63
+; RV64I-NEXT: and a1, a1, a3
+; RV64I-NEXT: ret
+ %zext = zext nneg i32 %b to i128
+ %1 = lshr i128 %a, %zext
+ ret i128 %1
+}
+
+define i128 @ashr128_shamt32(i128 %a, i32 signext %b) nounwind {
+; RV32I-LABEL: ashr128_shamt32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a4, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: srli a6, a2, 3
+; RV32I-NEXT: mv a7, sp
+; RV32I-NEXT: andi t0, a2, 31
+; RV32I-NEXT: andi a6, a6, 12
+; RV32I-NEXT: xori t0, t0, 31
+; RV32I-NEXT: add a6, a7, a6
+; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a1, 12(sp)
+; RV32I-NEXT: srai a1, a1, 31
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a1, 20(sp)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a1, 28(sp)
+; RV32I-NEXT: lw a1, 0(a6)
+; RV32I-NEXT: lw a3, 4(a6)
+; RV32I-NEXT: lw a4, 8(a6)
+; RV32I-NEXT: lw a5, 12(a6)
+; RV32I-NEXT: srl a1, a1, a2
+; RV32I-NEXT: slli a6, a3, 1
+; RV32I-NEXT: srl a3, a3, a2
+; RV32I-NEXT: slli a7, a4, 1
+; RV32I-NEXT: srl a4, a4, a2
+; RV32I-NEXT: sra a2, a5, a2
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: sll a6, a6, t0
+; RV32I-NEXT: sll a7, a7, t0
+; RV32I-NEXT: sll a5, a5, t0
+; RV32I-NEXT: or a1, a1, a6
+; RV32I-NEXT: or a3, a3, a7
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
+; RV32I-NEXT: sw a2, 12(a0)
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ashr128_shamt32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: mv a3, a1
+; RV64I-NEXT: addi a4, a2, -64
+; RV64I-NEXT: sra a1, a1, a2
+; RV64I-NEXT: bltz a4, .LBB15_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: srai a3, a3, 63
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: mv a1, a3
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB15_2:
+; RV64I-NEXT: srl a0, a0, a2
+; RV64I-NEXT: not a2, a2
+; RV64I-NEXT: slli a3, a3, 1
+; RV64I-NEXT: sll a2, a3, a2
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: ret
+ %zext = zext nneg i32 %b to i128
+ %1 = ashr i128 %a, %zext
+ ret i128 %1
+}
+
+define i128 @shl128_shamt32(i128 %a, i32 signext %b) nounwind {
+; RV32I-LABEL: shl128_shamt32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a4, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
+; RV32I-NEXT: srli a6, a2, 3
+; RV32I-NEXT: addi a7, sp, 16
+; RV32I-NEXT: andi t0, a2, 31
+; RV32I-NEXT: andi a6, a6, 12
+; RV32I-NEXT: sub a6, a7, a6
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw a5, 24(sp)
+; RV32I-NEXT: sw a1, 28(sp)
+; RV32I-NEXT: lw a1, 0(a6)
+; RV32I-NEXT: lw a3, 4(a6)
+; RV32I-NEXT: lw a4, 8(a6)
+; RV32I-NEXT: lw a5, 12(a6)
+; RV32I-NEXT: xori a6, t0, 31
+; RV32I-NEXT: sll a7, a3, a2
+; RV32I-NEXT: srli t0, a1, 1
+; RV32I-NEXT: sll a5, a5, a2
+; RV32I-NEXT: sll a1, a1, a2
+; RV32I-NEXT: sll a2, a4, a2
+; RV32I-NEXT: srli a3, a3, 1
+; RV32I-NEXT: srli a4, a4, 1
+; RV32I-NEXT: srl t0, t0, a6
+; RV32I-NEXT: srl a3, a3, a6
+; RV32I-NEXT: srl a4, a4, a6
+; RV32I-NEXT: or a6, a7, t0
+; RV32I-NEXT: or a2, a2, a3
+; RV32I-NEXT: or a4, a5, a4
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw a6, 4(a0)
+; RV32I-NEXT: sw a2, 8(a0)
+; RV32I-NEXT: sw a4, 12(a0)
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: shl128_shamt32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi a4, a2, -64
+; RV64I-NEXT: sll a3, a0, a2
+; RV64I-NEXT: bltz a4, .LBB16_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a3
+; RV64I-NEXT: j .LBB16_3
+; RV64I-NEXT: .LBB16_2:
+; RV64I-NEXT: sll a1, a1, a2
+; RV64I-NEXT: not a2, a2
+; RV64I-NEXT: srli a0, a0, 1
+; RV64I-NEXT: srl a0, a0, a2
+; RV64I-NEXT: or a1, a1, a0
+; RV64I-NEXT: .LBB16_3:
+; RV64I-NEXT: srai a0, a4, 63
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: ret
+ %zext = zext nneg i32 %b to i128
+ %1 = shl i128 %a, %zext
+ ret i128 %1
+}
More information about the llvm-commits
mailing list