[llvm] [DAGCombiner] Add basic support for `trunc nsw/nuw` (PR #113808)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 27 23:30:06 PDT 2024
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/113808
>From 48992b8ccd9621f9ec2f7b8bf97088e86dc3957d Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 27 Oct 2024 20:42:15 +0800
Subject: [PATCH 1/5] [DAGCombiner] Add pre-commit tests. NFC.
---
llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll | 63 ++++++++++++++++
llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll | 83 ++++++++++++++++++++++
llvm/test/CodeGen/X86/trunc-nsw-nuw.ll | 62 ++++++++++++++++
3 files changed, 208 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll
create mode 100644 llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll
create mode 100644 llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
diff --git a/llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll b/llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll
new file mode 100644
index 00000000000000..7a6b64ac6b0a24
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
+
+define zeroext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
+; CHECK-LABEL: trunc_nuw_nsw_urem:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #5977 // =0x1759
+; CHECK-NEXT: mov w9, w0
+; CHECK-NEXT: movk w8, #53687, lsl #16
+; CHECK-NEXT: umull x8, w9, w8
+; CHECK-NEXT: mov w9, #10000 // =0x2710
+; CHECK-NEXT: lsr x8, x8, #45
+; CHECK-NEXT: msub w0, w8, w9, w0
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %rem = urem i32 %trunc, 10000
+ ret i32 %rem
+}
+
+define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #52429 // =0xcccd
+; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: mul w8, w9, w8
+; CHECK-NEXT: lsr w0, w8, #23
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = zext nneg i16 %div to i64
+ ret i64 %ext
+}
+
+define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: sext_udiv_trunc_nuw:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #52429 // =0xcccd
+; CHECK-NEXT: and w9, w0, #0xffff
+; CHECK-NEXT: mul w8, w9, w8
+; CHECK-NEXT: lsr w0, w8, #23
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = sext i16 %div to i64
+ ret i64 %ext
+}
+
+define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
+; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add w8, w1, #5
+; CHECK-NEXT: add x0, x0, w8, uxtw #2
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %add = add nuw nsw i32 %trunc, 5
+ %offset = zext nneg i32 %add to i64
+ %gep = getelementptr nusw float, ptr %p, i64 %offset
+ ret ptr %gep
+}
diff --git a/llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll b/llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll
new file mode 100644
index 00000000000000..1f1dbc770467e8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s
+
+define signext i8 @trunc_nsw_add(i32 signext %x) nounwind {
+; CHECK-LABEL: trunc_nsw_add:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: slli a0, a0, 56
+; CHECK-NEXT: srai a0, a0, 56
+; CHECK-NEXT: ret
+entry:
+ %add = add nsw i32 %x, 1
+ %trunc = trunc nsw i32 %add to i8
+ ret i8 %trunc
+}
+
+define signext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
+; CHECK-LABEL: trunc_nuw_nsw_urem:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a1, a0, 32
+; CHECK-NEXT: lui a2, 858993
+; CHECK-NEXT: addi a2, a2, 1881
+; CHECK-NEXT: slli a2, a2, 32
+; CHECK-NEXT: mulhu a1, a1, a2
+; CHECK-NEXT: srli a1, a1, 45
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: addi a2, a2, 1808
+; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: subw a0, a0, a1
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %rem = urem i32 %trunc, 10000
+ ret i32 %rem
+}
+
+define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a1, 52429
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a0, a0, 48
+; CHECK-NEXT: mulhu a0, a0, a1
+; CHECK-NEXT: srli a0, a0, 23
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = zext nneg i16 %div to i64
+ ret i64 %ext
+}
+
+define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: sext_udiv_trunc_nuw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a1, 52429
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a0, a0, 48
+; CHECK-NEXT: mulhu a0, a0, a1
+; CHECK-NEXT: srli a0, a0, 23
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = sext i16 %div to i64
+ ret i64 %ext
+}
+
+define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
+; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sext.w a1, a1
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: addi a0, a0, 20
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %add = add nuw nsw i32 %trunc, 5
+ %offset = zext nneg i32 %add to i64
+ %gep = getelementptr nusw float, ptr %p, i64 %offset
+ ret ptr %gep
+}
diff --git a/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll b/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
new file mode 100644
index 00000000000000..c06c9e9aec6a09
--- /dev/null
+++ b/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64 | FileCheck %s
+
+define zeroext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
+; CHECK-LABEL: trunc_nuw_nsw_urem:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: movl $3518437209, %edx # imm = 0xD1B71759
+; CHECK-NEXT: imulq %rcx, %rdx
+; CHECK-NEXT: shrq $45, %rdx
+; CHECK-NEXT: imull $10000, %edx, %ecx # imm = 0x2710
+; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %rem = urem i32 %trunc, 10000
+ ret i32 %rem
+}
+
+define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movzwl %di, %eax
+; CHECK-NEXT: imull $52429, %eax, %eax # imm = 0xCCCD
+; CHECK-NEXT: shrl $23, %eax
+; CHECK-NEXT: retq
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = zext nneg i16 %div to i64
+ ret i64 %ext
+}
+
+define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: sext_udiv_trunc_nuw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movzwl %di, %eax
+; CHECK-NEXT: imull $52429, %eax, %eax # imm = 0xCCCD
+; CHECK-NEXT: shrl $23, %eax
+; CHECK-NEXT: retq
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = sext i16 %div to i64
+ ret i64 %ext
+}
+
+define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
+; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: leaq 20(%rdi,%rax,4), %rax
+; CHECK-NEXT: retq
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %add = add nuw nsw i32 %trunc, 5
+ %offset = zext nneg i32 %add to i64
+ %gep = getelementptr nusw float, ptr %p, i64 %offset
+ ret ptr %gep
+}
>From c85d59af9dd2d44ecbef7d2c1afa3bbc74508b71 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 27 Oct 2024 21:42:09 +0800
Subject: [PATCH 2/5] [DAGCombiner] Add basic support for `trunc nsw/nuw`
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 54 +++++++++----------
.../SelectionDAG/SelectionDAGBuilder.cpp | 8 ++-
llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll | 11 ++--
llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll | 33 +++++-------
llvm/test/CodeGen/X86/trunc-nsw-nuw.ll | 18 +++----
5 files changed, 59 insertions(+), 65 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ad2d2ede302af8..0b249f2f7267bd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2329,6 +2329,8 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
if (N->getOpcode() == ISD::TRUNCATE) {
Op = N->getOperand(0);
Known = DAG.computeKnownBits(Op);
+ if (N->getFlags().hasNoUnsignedWrap())
+ Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
return true;
}
@@ -13793,23 +13795,22 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
unsigned OpBits = Op.getScalarValueSizeInBits();
unsigned MidBits = N0.getScalarValueSizeInBits();
unsigned DestBits = VT.getScalarSizeInBits();
- unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
- if (OpBits == DestBits) {
- // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
- // bits, it is already ready.
- if (NumSignBits > DestBits-MidBits)
+ if (N0->getFlags().hasNoSignedWrap() ||
+ DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
return Op;
- } else if (OpBits < DestBits) {
- // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
- // bits, just sext from i32.
- if (NumSignBits > OpBits-MidBits)
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
- } else {
- // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
- // bits, just truncate to i32.
- if (NumSignBits > OpBits-MidBits)
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+ }
}
// fold (sext (truncate x)) -> (sextinreg x).
@@ -14083,24 +14084,23 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
unsigned OpBits = SrcVT.getScalarSizeInBits();
unsigned MidBits = MinVT.getScalarSizeInBits();
unsigned DestBits = VT.getScalarSizeInBits();
- unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
- if (OpBits == DestBits) {
- // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
- // bits, it is already ready.
- if (NumSignBits > DestBits - MidBits)
+ if (N0->getFlags().hasNoSignedWrap() ||
+ DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
return Op;
- } else if (OpBits < DestBits) {
- // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
- // bits, just sext from i32.
- // FIXME: This can probably be ZERO_EXTEND nneg?
- if (NumSignBits > OpBits - MidBits)
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ // FIXME: This can probably be ZERO_EXTEND nneg?
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
- } else {
- // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
- // bits, just truncate to i32.
- if (NumSignBits > OpBits - MidBits)
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+ }
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8450553743074c..e1e4db79627ef6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3823,7 +3823,13 @@ void SelectionDAGBuilder::visitTrunc(const User &I) {
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
- setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
+ SDNodeFlags Flags;
+ if (auto *Trunc = dyn_cast<TruncInst>(&I)) {
+ Flags.setNoSignedWrap(Trunc->hasNoSignedWrap());
+ Flags.setNoUnsignedWrap(Trunc->hasNoUnsignedWrap());
+ }
+
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N, Flags));
}
void SelectionDAGBuilder::visitZExt(const User &I) {
diff --git a/llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll b/llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll
index 7a6b64ac6b0a24..6041db74639f32 100644
--- a/llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll
@@ -5,10 +5,9 @@ define zeroext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
; CHECK-LABEL: trunc_nuw_nsw_urem:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #5977 // =0x1759
-; CHECK-NEXT: mov w9, w0
-; CHECK-NEXT: movk w8, #53687, lsl #16
-; CHECK-NEXT: umull x8, w9, w8
; CHECK-NEXT: mov w9, #10000 // =0x2710
+; CHECK-NEXT: movk w8, #53687, lsl #16
+; CHECK-NEXT: mul x8, x0, x8
; CHECK-NEXT: lsr x8, x8, #45
; CHECK-NEXT: msub w0, w8, w9, w0
; CHECK-NEXT: ret
@@ -22,8 +21,7 @@ define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #52429 // =0xcccd
-; CHECK-NEXT: and w9, w0, #0xffff
-; CHECK-NEXT: mul w8, w9, w8
+; CHECK-NEXT: mul w8, w0, w8
; CHECK-NEXT: lsr w0, w8, #23
; CHECK-NEXT: ret
entry:
@@ -37,8 +35,7 @@ define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
; CHECK-LABEL: sext_udiv_trunc_nuw:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #52429 // =0xcccd
-; CHECK-NEXT: and w9, w0, #0xffff
-; CHECK-NEXT: mul w8, w9, w8
+; CHECK-NEXT: mul w8, w0, w8
; CHECK-NEXT: lsr w0, w8, #23
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll b/llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll
index 1f1dbc770467e8..f270775adcc155 100644
--- a/llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll
+++ b/llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll
@@ -4,9 +4,7 @@
define signext i8 @trunc_nsw_add(i32 signext %x) nounwind {
; CHECK-LABEL: trunc_nsw_add:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi a0, a0, 1
-; CHECK-NEXT: slli a0, a0, 56
-; CHECK-NEXT: srai a0, a0, 56
+; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: ret
entry:
%add = add nsw i32 %x, 1
@@ -17,11 +15,11 @@ entry:
define signext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
; CHECK-LABEL: trunc_nuw_nsw_urem:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: slli a1, a0, 32
-; CHECK-NEXT: lui a2, 858993
-; CHECK-NEXT: addi a2, a2, 1881
-; CHECK-NEXT: slli a2, a2, 32
-; CHECK-NEXT: mulhu a1, a1, a2
+; CHECK-NEXT: lui a1, 210
+; CHECK-NEXT: addiw a1, a1, -1167
+; CHECK-NEXT: slli a1, a1, 12
+; CHECK-NEXT: addi a1, a1, 1881
+; CHECK-NEXT: mul a1, a0, a1
; CHECK-NEXT: srli a1, a1, 45
; CHECK-NEXT: lui a2, 2
; CHECK-NEXT: addi a2, a2, 1808
@@ -37,11 +35,10 @@ entry:
define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lui a1, 52429
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: slli a0, a0, 48
-; CHECK-NEXT: mulhu a0, a0, a1
-; CHECK-NEXT: srli a0, a0, 23
+; CHECK-NEXT: lui a1, 13
+; CHECK-NEXT: addi a1, a1, -819
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: srliw a0, a0, 23
; CHECK-NEXT: ret
entry:
%trunc = trunc nuw i64 %x to i16
@@ -53,11 +50,10 @@ entry:
define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
; CHECK-LABEL: sext_udiv_trunc_nuw:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lui a1, 52429
-; CHECK-NEXT: slli a1, a1, 4
-; CHECK-NEXT: slli a0, a0, 48
-; CHECK-NEXT: mulhu a0, a0, a1
-; CHECK-NEXT: srli a0, a0, 23
+; CHECK-NEXT: lui a1, 13
+; CHECK-NEXT: addi a1, a1, -819
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: srliw a0, a0, 23
; CHECK-NEXT: ret
entry:
%trunc = trunc nuw i64 %x to i16
@@ -69,7 +65,6 @@ entry:
define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sext.w a1, a1
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: addi a0, a0, 20
diff --git a/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll b/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
index c06c9e9aec6a09..40b48bec8fffd7 100644
--- a/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
+++ b/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
@@ -5,11 +5,10 @@ define zeroext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
; CHECK-LABEL: trunc_nuw_nsw_urem:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: movl $3518437209, %edx # imm = 0xD1B71759
-; CHECK-NEXT: imulq %rcx, %rdx
-; CHECK-NEXT: shrq $45, %rdx
-; CHECK-NEXT: imull $10000, %edx, %ecx # imm = 0x2710
+; CHECK-NEXT: movl $3518437209, %ecx # imm = 0xD1B71759
+; CHECK-NEXT: imulq %rdi, %rcx
+; CHECK-NEXT: shrq $45, %rcx
+; CHECK-NEXT: imull $10000, %ecx, %ecx # imm = 0x2710
; CHECK-NEXT: subl %ecx, %eax
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
@@ -22,8 +21,7 @@ entry:
define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movzwl %di, %eax
-; CHECK-NEXT: imull $52429, %eax, %eax # imm = 0xCCCD
+; CHECK-NEXT: imull $52429, %edi, %eax # imm = 0xCCCD
; CHECK-NEXT: shrl $23, %eax
; CHECK-NEXT: retq
entry:
@@ -36,8 +34,7 @@ entry:
define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
; CHECK-LABEL: sext_udiv_trunc_nuw:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movzwl %di, %eax
-; CHECK-NEXT: imull $52429, %eax, %eax # imm = 0xCCCD
+; CHECK-NEXT: imull $52429, %edi, %eax # imm = 0xCCCD
; CHECK-NEXT: shrl $23, %eax
; CHECK-NEXT: retq
entry:
@@ -50,8 +47,7 @@ entry:
define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: leaq 20(%rdi,%rax,4), %rax
+; CHECK-NEXT: leaq 20(%rdi,%rsi,4), %rax
; CHECK-NEXT: retq
entry:
%trunc = trunc nuw nsw i64 %x to i32
>From 80eb4f990c9cc5f73b404e8901d028513c0d31a8 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 27 Oct 2024 23:18:14 +0800
Subject: [PATCH 3/5] [DAGCombiner] Propagate nsw/nuw flags
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0b249f2f7267bd..509c89e0731437 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13809,7 +13809,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
} else {
// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
// bits, just truncate to i32.
- return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+ SDNodeFlags Flags;
+ Flags.setNoSignedWrap(true);
+ Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
}
}
@@ -14099,7 +14102,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
} else {
// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
// bits, just truncate to i32.
- return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+ SDNodeFlags Flags;
+ Flags.setNoSignedWrap(true);
+ Flags.setNoUnsignedWrap(true);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
}
}
}
>From 9a8bb48172f156f0779630139e56b096fc01b623 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 28 Oct 2024 14:22:38 +0800
Subject: [PATCH 4/5] [DAGCombiner] Add miscompilation reproducer. NFC.
---
llvm/test/CodeGen/X86/trunc-nsw-nuw.ll | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll b/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
index 40b48bec8fffd7..d497623f551942 100644
--- a/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
+++ b/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
@@ -56,3 +56,28 @@ entry:
%gep = getelementptr nusw float, ptr %p, i64 %offset
ret ptr %gep
}
+
+; Make sure nsw flag is dropped after we simplify the operand of TRUNCATE.
+
+define i32 @simplify_demanded_bits_drop_flag(i1 zeroext %x, i1 zeroext %y) nounwind {
+; CHECK-LABEL: simplify_demanded_bits_drop_flag:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: shll $2, %esi
+; CHECK-NEXT: xorl %edi, %esi
+; CHECK-NEXT: imulq $-1634202141, %rsi, %rax # imm = 0x9E980DE3
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq $63, %rcx
+; CHECK-NEXT: sarq $44, %rax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+entry:
+ %sel = select i1 %y, i64 4, i64 0
+ %conv0 = sext i1 %x to i64
+ %xor = xor i64 %sel, %conv0
+ %conv1 = trunc nsw i64 %xor to i32
+ %div = sdiv i32 %conv1, -10765
+ ret i32 %div
+}
>From fced38f1c5a459cf2eef797325060c7854c247d4 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 28 Oct 2024 14:29:38 +0800
Subject: [PATCH 5/5] [TargetLowering] Drop nuw/nsw flags in
`TargetLowering::SimplifyDemandedBits`
---
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 11 ++++++++++-
llvm/test/CodeGen/X86/trunc-nsw-nuw.ll | 4 ++--
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 758b3a5fc526e7..1c49ebaeb347e8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2572,14 +2572,23 @@ bool TargetLowering::SimplifyDemandedBits(
}
case ISD::TRUNCATE: {
SDValue Src = Op.getOperand(0);
+ SDNodeFlags Flags = Op->getFlags();
// Simplify the input, using demanded bit information, and compute the known
// zero/one bits live out.
unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
APInt TruncMask = DemandedBits.zext(OperandBitWidth);
if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
- Depth + 1))
+ Depth + 1)) {
+ if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op->setFlags(Flags);
+ }
return true;
+ }
Known = Known.trunc(BitWidth);
// Attempt to avoid multi-use ops if we don't need anything from them.
diff --git a/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll b/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
index d497623f551942..5c5f7045ea0306 100644
--- a/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
+++ b/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
@@ -62,11 +62,11 @@ entry:
define i32 @simplify_demanded_bits_drop_flag(i1 zeroext %x, i1 zeroext %y) nounwind {
; CHECK-LABEL: simplify_demanded_bits_drop_flag:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
; CHECK-NEXT: negl %edi
; CHECK-NEXT: shll $2, %esi
; CHECK-NEXT: xorl %edi, %esi
-; CHECK-NEXT: imulq $-1634202141, %rsi, %rax # imm = 0x9E980DE3
+; CHECK-NEXT: movslq %esi, %rax
+; CHECK-NEXT: imulq $-1634202141, %rax, %rax # imm = 0x9E980DE3
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq $63, %rcx
; CHECK-NEXT: sarq $44, %rax
More information about the llvm-commits
mailing list