[llvm] 0d29279 - [DAGCombine] Propagate nuw when evaluating sub with narrower types (#156710)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 3 19:17:50 PDT 2025
Author: Yingwei Zheng
Date: 2025-09-04T10:17:45+08:00
New Revision: 0d292794650353352a33fcc856d4b52328d3da97
URL: https://github.com/llvm/llvm-project/commit/0d292794650353352a33fcc856d4b52328d3da97
DIFF: https://github.com/llvm/llvm-project/commit/0d292794650353352a33fcc856d4b52328d3da97.diff
LOG: [DAGCombine] Propagate nuw when evaluating sub with narrower types (#156710)
Proof: https://alive2.llvm.org/ce/z/cdbzSL
Closes https://github.com/llvm/llvm-project/issues/156559.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/shift-i128.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bed3c42473e27..6310f7270ceaf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16317,7 +16317,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
- return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
+ SDNodeFlags Flags;
+ // Propagate nuw for sub.
+ if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
+ DAG.MaskedValueIsZero(
+ N0->getOperand(0),
+ APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
+ VT.getScalarSizeInBits())))
+ Flags.setNoUnsignedWrap(true);
+ return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
}
}
break;
diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll
index 9323cd5b1917f..a82656e4b7147 100644
--- a/llvm/test/CodeGen/X86/shift-i128.ll
+++ b/llvm/test/CodeGen/X86/shift-i128.ll
@@ -938,3 +938,207 @@ define i128 @lshr_shl_mask(i128 %a0) {
%2 = lshr i128 %1, 1
ret i128 %2
}
+
+define i128 @shift_i128_limited_shamt(i128 noundef %a, i32 noundef %b) nounwind {
+; i686-LABEL: shift_i128_limited_shamt:
+; i686: # %bb.0: # %start
+; i686-NEXT: pushl %ebp
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: pushl %ebx
+; i686-NEXT: pushl %edi
+; i686-NEXT: pushl %esi
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: subl $16, %esp
+; i686-NEXT: movl 28(%ebp), %esi
+; i686-NEXT: movl 32(%ebp), %eax
+; i686-NEXT: movb $6, %dl
+; i686-NEXT: subb 40(%ebp), %dl
+; i686-NEXT: movl %edx, %ecx
+; i686-NEXT: shll %cl, %eax
+; i686-NEXT: movl %esi, %ebx
+; i686-NEXT: movl %esi, %edi
+; i686-NEXT: shrl %ebx
+; i686-NEXT: notb %cl
+; i686-NEXT: shrl %cl, %ebx
+; i686-NEXT: orl %eax, %ebx
+; i686-NEXT: movl 24(%ebp), %esi
+; i686-NEXT: movl %esi, %eax
+; i686-NEXT: movl %edx, %ecx
+; i686-NEXT: shll %cl, %eax
+; i686-NEXT: shldl %cl, %esi, %edi
+; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT: movl 8(%ebp), %edi
+; i686-NEXT: movl 36(%ebp), %esi
+; i686-NEXT: movl 32(%ebp), %edx
+; i686-NEXT: shldl %cl, %edx, %esi
+; i686-NEXT: movl %esi, 12(%edi)
+; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; i686-NEXT: movl %ecx, 4(%edi)
+; i686-NEXT: movl %eax, (%edi)
+; i686-NEXT: movl %ebx, 8(%edi)
+; i686-NEXT: movl %edi, %eax
+; i686-NEXT: leal -12(%ebp), %esp
+; i686-NEXT: popl %esi
+; i686-NEXT: popl %edi
+; i686-NEXT: popl %ebx
+; i686-NEXT: popl %ebp
+; i686-NEXT: retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt:
+; x86_64: # %bb.0: # %start
+; x86_64-NEXT: movq %rdi, %rax
+; x86_64-NEXT: movb $6, %cl
+; x86_64-NEXT: subb %dl, %cl
+; x86_64-NEXT: shldq %cl, %rdi, %rsi
+; x86_64-NEXT: shlq %cl, %rax
+; x86_64-NEXT: movq %rsi, %rdx
+; x86_64-NEXT: retq
+start:
+ %shamt = sub nuw nsw i32 6, %b
+ %ext = zext nneg i32 %shamt to i128
+ %res = shl i128 %a, %ext
+ ret i128 %res
+}
+
+define i128 @shift_i128_limited_shamt_no_nuw(i128 noundef %a, i32 noundef %b) nounwind {
+; i686-LABEL: shift_i128_limited_shamt_no_nuw:
+; i686: # %bb.0: # %start
+; i686-NEXT: pushl %ebp
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: pushl %ebx
+; i686-NEXT: pushl %edi
+; i686-NEXT: pushl %esi
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: subl $48, %esp
+; i686-NEXT: movzbl 40(%ebp), %eax
+; i686-NEXT: movl 24(%ebp), %ecx
+; i686-NEXT: movl 28(%ebp), %edx
+; i686-NEXT: movl 32(%ebp), %esi
+; i686-NEXT: movl 36(%ebp), %edi
+; i686-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; i686-NEXT: movb $6, %cl
+; i686-NEXT: subb %al, %cl
+; i686-NEXT: movl %ecx, %eax
+; i686-NEXT: shrb $3, %al
+; i686-NEXT: andb $12, %al
+; i686-NEXT: negb %al
+; i686-NEXT: movsbl %al, %eax
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, (%esp)
+; i686-NEXT: movl 20(%esp,%eax), %edx
+; i686-NEXT: movl 24(%esp,%eax), %ebx
+; i686-NEXT: movl %ebx, %edi
+; i686-NEXT: shldl %cl, %edx, %edi
+; i686-NEXT: movl 16(%esp,%eax), %esi
+; i686-NEXT: movl 28(%esp,%eax), %eax
+; i686-NEXT: shldl %cl, %ebx, %eax
+; i686-NEXT: movl 8(%ebp), %ebx
+; i686-NEXT: movl %eax, 12(%ebx)
+; i686-NEXT: movl %edi, 8(%ebx)
+; i686-NEXT: movl %esi, %eax
+; i686-NEXT: shll %cl, %eax
+; i686-NEXT: shldl %cl, %esi, %edx
+; i686-NEXT: movl %edx, 4(%ebx)
+; i686-NEXT: movl %eax, (%ebx)
+; i686-NEXT: movl %ebx, %eax
+; i686-NEXT: leal -12(%ebp), %esp
+; i686-NEXT: popl %esi
+; i686-NEXT: popl %edi
+; i686-NEXT: popl %ebx
+; i686-NEXT: popl %ebp
+; i686-NEXT: retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt_no_nuw:
+; x86_64: # %bb.0: # %start
+; x86_64-NEXT: movb $6, %cl
+; x86_64-NEXT: subb %dl, %cl
+; x86_64-NEXT: shldq %cl, %rdi, %rsi
+; x86_64-NEXT: shlq %cl, %rdi
+; x86_64-NEXT: xorl %eax, %eax
+; x86_64-NEXT: testb $64, %cl
+; x86_64-NEXT: cmovneq %rdi, %rsi
+; x86_64-NEXT: cmoveq %rdi, %rax
+; x86_64-NEXT: movq %rsi, %rdx
+; x86_64-NEXT: retq
+start:
+ %shamt = sub nsw i32 6, %b
+ %ext = zext nneg i32 %shamt to i128
+ %res = shl i128 %a, %ext
+ ret i128 %res
+}
+
+define i128 @shift_i128_limited_shamt_unknown_lhs(i128 noundef %a, i32 noundef %b, i32 noundef %c) nounwind {
+; i686-LABEL: shift_i128_limited_shamt_unknown_lhs:
+; i686: # %bb.0: # %start
+; i686-NEXT: pushl %ebp
+; i686-NEXT: movl %esp, %ebp
+; i686-NEXT: pushl %ebx
+; i686-NEXT: pushl %edi
+; i686-NEXT: pushl %esi
+; i686-NEXT: andl $-16, %esp
+; i686-NEXT: subl $48, %esp
+; i686-NEXT: movl 24(%ebp), %eax
+; i686-NEXT: movl 28(%ebp), %edx
+; i686-NEXT: movl 32(%ebp), %esi
+; i686-NEXT: movl 36(%ebp), %edi
+; i686-NEXT: movl 44(%ebp), %ecx
+; i686-NEXT: subl 40(%ebp), %ecx
+; i686-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; i686-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT: movl $0, (%esp)
+; i686-NEXT: movl %ecx, %eax
+; i686-NEXT: shrb $3, %al
+; i686-NEXT: andb $12, %al
+; i686-NEXT: negb %al
+; i686-NEXT: movsbl %al, %eax
+; i686-NEXT: movl 20(%esp,%eax), %edx
+; i686-NEXT: movl 24(%esp,%eax), %ebx
+; i686-NEXT: movl %ebx, %edi
+; i686-NEXT: shldl %cl, %edx, %edi
+; i686-NEXT: movl 16(%esp,%eax), %esi
+; i686-NEXT: movl 28(%esp,%eax), %eax
+; i686-NEXT: shldl %cl, %ebx, %eax
+; i686-NEXT: movl 8(%ebp), %ebx
+; i686-NEXT: movl %eax, 12(%ebx)
+; i686-NEXT: movl %edi, 8(%ebx)
+; i686-NEXT: movl %esi, %eax
+; i686-NEXT: shll %cl, %eax
+; i686-NEXT: # kill: def $cl killed $cl killed $ecx
+; i686-NEXT: shldl %cl, %esi, %edx
+; i686-NEXT: movl %edx, 4(%ebx)
+; i686-NEXT: movl %eax, (%ebx)
+; i686-NEXT: movl %ebx, %eax
+; i686-NEXT: leal -12(%ebp), %esp
+; i686-NEXT: popl %esi
+; i686-NEXT: popl %edi
+; i686-NEXT: popl %ebx
+; i686-NEXT: popl %ebp
+; i686-NEXT: retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt_unknown_lhs:
+; x86_64: # %bb.0: # %start
+; x86_64-NEXT: subl %edx, %ecx
+; x86_64-NEXT: shldq %cl, %rdi, %rsi
+; x86_64-NEXT: shlq %cl, %rdi
+; x86_64-NEXT: xorl %eax, %eax
+; x86_64-NEXT: testb $64, %cl
+; x86_64-NEXT: cmovneq %rdi, %rsi
+; x86_64-NEXT: cmoveq %rdi, %rax
+; x86_64-NEXT: movq %rsi, %rdx
+; x86_64-NEXT: retq
+start:
+ %shamt = sub nuw nsw i32 %c, %b
+ %ext = zext nneg i32 %shamt to i128
+ %res = shl i128 %a, %ext
+ ret i128 %res
+}
More information about the llvm-commits
mailing list