[llvm] 0d29279 - [DAGCombine] Propagate nuw when evaluating sub with narrower types (#156710)

via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 3 19:17:50 PDT 2025


Author: Yingwei Zheng
Date: 2025-09-04T10:17:45+08:00
New Revision: 0d292794650353352a33fcc856d4b52328d3da97

URL: https://github.com/llvm/llvm-project/commit/0d292794650353352a33fcc856d4b52328d3da97
DIFF: https://github.com/llvm/llvm-project/commit/0d292794650353352a33fcc856d4b52328d3da97.diff

LOG: [DAGCombine] Propagate nuw when evaluating sub with narrower types (#156710)

Proof: https://alive2.llvm.org/ce/z/cdbzSL
Closes https://github.com/llvm/llvm-project/issues/156559.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/X86/shift-i128.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bed3c42473e27..6310f7270ceaf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16317,7 +16317,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
-        return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
+        SDNodeFlags Flags;
+        // Propagate nuw for sub.
+        if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
+            DAG.MaskedValueIsZero(
+                N0->getOperand(0),
+                APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
+                                      VT.getScalarSizeInBits())))
+          Flags.setNoUnsignedWrap(true);
+        return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
       }
     }
     break;

diff  --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll
index 9323cd5b1917f..a82656e4b7147 100644
--- a/llvm/test/CodeGen/X86/shift-i128.ll
+++ b/llvm/test/CodeGen/X86/shift-i128.ll
@@ -938,3 +938,207 @@ define i128 @lshr_shl_mask(i128 %a0) {
   %2 = lshr i128 %1, 1
   ret i128 %2
 }
+
+define i128 @shift_i128_limited_shamt(i128 noundef %a, i32 noundef %b) nounwind {
+; i686-LABEL: shift_i128_limited_shamt:
+; i686:       # %bb.0: # %start
+; i686-NEXT:    pushl %ebp
+; i686-NEXT:    movl %esp, %ebp
+; i686-NEXT:    pushl %ebx
+; i686-NEXT:    pushl %edi
+; i686-NEXT:    pushl %esi
+; i686-NEXT:    andl $-16, %esp
+; i686-NEXT:    subl $16, %esp
+; i686-NEXT:    movl 28(%ebp), %esi
+; i686-NEXT:    movl 32(%ebp), %eax
+; i686-NEXT:    movb $6, %dl
+; i686-NEXT:    subb 40(%ebp), %dl
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    shll %cl, %eax
+; i686-NEXT:    movl %esi, %ebx
+; i686-NEXT:    movl %esi, %edi
+; i686-NEXT:    shrl %ebx
+; i686-NEXT:    notb %cl
+; i686-NEXT:    shrl %cl, %ebx
+; i686-NEXT:    orl %eax, %ebx
+; i686-NEXT:    movl 24(%ebp), %esi
+; i686-NEXT:    movl %esi, %eax
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    shll %cl, %eax
+; i686-NEXT:    shldl %cl, %esi, %edi
+; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl 8(%ebp), %edi
+; i686-NEXT:    movl 36(%ebp), %esi
+; i686-NEXT:    movl 32(%ebp), %edx
+; i686-NEXT:    shldl %cl, %edx, %esi
+; i686-NEXT:    movl %esi, 12(%edi)
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; i686-NEXT:    movl %ecx, 4(%edi)
+; i686-NEXT:    movl %eax, (%edi)
+; i686-NEXT:    movl %ebx, 8(%edi)
+; i686-NEXT:    movl %edi, %eax
+; i686-NEXT:    leal -12(%ebp), %esp
+; i686-NEXT:    popl %esi
+; i686-NEXT:    popl %edi
+; i686-NEXT:    popl %ebx
+; i686-NEXT:    popl %ebp
+; i686-NEXT:    retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt:
+; x86_64:       # %bb.0: # %start
+; x86_64-NEXT:    movq %rdi, %rax
+; x86_64-NEXT:    movb $6, %cl
+; x86_64-NEXT:    subb %dl, %cl
+; x86_64-NEXT:    shldq %cl, %rdi, %rsi
+; x86_64-NEXT:    shlq %cl, %rax
+; x86_64-NEXT:    movq %rsi, %rdx
+; x86_64-NEXT:    retq
+start:
+  %shamt = sub nuw nsw i32 6, %b
+  %ext = zext nneg i32 %shamt to i128
+  %res = shl i128 %a, %ext
+  ret i128 %res
+}
+
+define i128 @shift_i128_limited_shamt_no_nuw(i128 noundef %a, i32 noundef %b) nounwind {
+; i686-LABEL: shift_i128_limited_shamt_no_nuw:
+; i686:       # %bb.0: # %start
+; i686-NEXT:    pushl %ebp
+; i686-NEXT:    movl %esp, %ebp
+; i686-NEXT:    pushl %ebx
+; i686-NEXT:    pushl %edi
+; i686-NEXT:    pushl %esi
+; i686-NEXT:    andl $-16, %esp
+; i686-NEXT:    subl $48, %esp
+; i686-NEXT:    movzbl 40(%ebp), %eax
+; i686-NEXT:    movl 24(%ebp), %ecx
+; i686-NEXT:    movl 28(%ebp), %edx
+; i686-NEXT:    movl 32(%ebp), %esi
+; i686-NEXT:    movl 36(%ebp), %edi
+; i686-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; i686-NEXT:    movb $6, %cl
+; i686-NEXT:    subb %al, %cl
+; i686-NEXT:    movl %ecx, %eax
+; i686-NEXT:    shrb $3, %al
+; i686-NEXT:    andb $12, %al
+; i686-NEXT:    negb %al
+; i686-NEXT:    movsbl %al, %eax
+; i686-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, (%esp)
+; i686-NEXT:    movl 20(%esp,%eax), %edx
+; i686-NEXT:    movl 24(%esp,%eax), %ebx
+; i686-NEXT:    movl %ebx, %edi
+; i686-NEXT:    shldl %cl, %edx, %edi
+; i686-NEXT:    movl 16(%esp,%eax), %esi
+; i686-NEXT:    movl 28(%esp,%eax), %eax
+; i686-NEXT:    shldl %cl, %ebx, %eax
+; i686-NEXT:    movl 8(%ebp), %ebx
+; i686-NEXT:    movl %eax, 12(%ebx)
+; i686-NEXT:    movl %edi, 8(%ebx)
+; i686-NEXT:    movl %esi, %eax
+; i686-NEXT:    shll %cl, %eax
+; i686-NEXT:    shldl %cl, %esi, %edx
+; i686-NEXT:    movl %edx, 4(%ebx)
+; i686-NEXT:    movl %eax, (%ebx)
+; i686-NEXT:    movl %ebx, %eax
+; i686-NEXT:    leal -12(%ebp), %esp
+; i686-NEXT:    popl %esi
+; i686-NEXT:    popl %edi
+; i686-NEXT:    popl %ebx
+; i686-NEXT:    popl %ebp
+; i686-NEXT:    retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt_no_nuw:
+; x86_64:       # %bb.0: # %start
+; x86_64-NEXT:    movb $6, %cl
+; x86_64-NEXT:    subb %dl, %cl
+; x86_64-NEXT:    shldq %cl, %rdi, %rsi
+; x86_64-NEXT:    shlq %cl, %rdi
+; x86_64-NEXT:    xorl %eax, %eax
+; x86_64-NEXT:    testb $64, %cl
+; x86_64-NEXT:    cmovneq %rdi, %rsi
+; x86_64-NEXT:    cmoveq %rdi, %rax
+; x86_64-NEXT:    movq %rsi, %rdx
+; x86_64-NEXT:    retq
+start:
+  %shamt = sub nsw i32 6, %b
+  %ext = zext nneg i32 %shamt to i128
+  %res = shl i128 %a, %ext
+  ret i128 %res
+}
+
+define i128 @shift_i128_limited_shamt_unknown_lhs(i128 noundef %a, i32 noundef %b, i32 noundef %c) nounwind {
+; i686-LABEL: shift_i128_limited_shamt_unknown_lhs:
+; i686:       # %bb.0: # %start
+; i686-NEXT:    pushl %ebp
+; i686-NEXT:    movl %esp, %ebp
+; i686-NEXT:    pushl %ebx
+; i686-NEXT:    pushl %edi
+; i686-NEXT:    pushl %esi
+; i686-NEXT:    andl $-16, %esp
+; i686-NEXT:    subl $48, %esp
+; i686-NEXT:    movl 24(%ebp), %eax
+; i686-NEXT:    movl 28(%ebp), %edx
+; i686-NEXT:    movl 32(%ebp), %esi
+; i686-NEXT:    movl 36(%ebp), %edi
+; i686-NEXT:    movl 44(%ebp), %ecx
+; i686-NEXT:    subl 40(%ebp), %ecx
+; i686-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; i686-NEXT:    movl $0, (%esp)
+; i686-NEXT:    movl %ecx, %eax
+; i686-NEXT:    shrb $3, %al
+; i686-NEXT:    andb $12, %al
+; i686-NEXT:    negb %al
+; i686-NEXT:    movsbl %al, %eax
+; i686-NEXT:    movl 20(%esp,%eax), %edx
+; i686-NEXT:    movl 24(%esp,%eax), %ebx
+; i686-NEXT:    movl %ebx, %edi
+; i686-NEXT:    shldl %cl, %edx, %edi
+; i686-NEXT:    movl 16(%esp,%eax), %esi
+; i686-NEXT:    movl 28(%esp,%eax), %eax
+; i686-NEXT:    shldl %cl, %ebx, %eax
+; i686-NEXT:    movl 8(%ebp), %ebx
+; i686-NEXT:    movl %eax, 12(%ebx)
+; i686-NEXT:    movl %edi, 8(%ebx)
+; i686-NEXT:    movl %esi, %eax
+; i686-NEXT:    shll %cl, %eax
+; i686-NEXT:    # kill: def $cl killed $cl killed $ecx
+; i686-NEXT:    shldl %cl, %esi, %edx
+; i686-NEXT:    movl %edx, 4(%ebx)
+; i686-NEXT:    movl %eax, (%ebx)
+; i686-NEXT:    movl %ebx, %eax
+; i686-NEXT:    leal -12(%ebp), %esp
+; i686-NEXT:    popl %esi
+; i686-NEXT:    popl %edi
+; i686-NEXT:    popl %ebx
+; i686-NEXT:    popl %ebp
+; i686-NEXT:    retl $4
+;
+; x86_64-LABEL: shift_i128_limited_shamt_unknown_lhs:
+; x86_64:       # %bb.0: # %start
+; x86_64-NEXT:    subl %edx, %ecx
+; x86_64-NEXT:    shldq %cl, %rdi, %rsi
+; x86_64-NEXT:    shlq %cl, %rdi
+; x86_64-NEXT:    xorl %eax, %eax
+; x86_64-NEXT:    testb $64, %cl
+; x86_64-NEXT:    cmovneq %rdi, %rsi
+; x86_64-NEXT:    cmoveq %rdi, %rax
+; x86_64-NEXT:    movq %rsi, %rdx
+; x86_64-NEXT:    retq
+start:
+  %shamt = sub nuw nsw i32 %c, %b
+  %ext = zext nneg i32 %shamt to i128
+  %res = shl i128 %a, %ext
+  ret i128 %res
+}


        


More information about the llvm-commits mailing list