[llvm] [X86] Truncate i64 add to i32 when upper 33 bits are zeros (PR #144066)

Sat Jun 21 04:40:14 PDT 2025

https://github.com/omkar-mohanty updated https://github.com/llvm/llvm-project/pull/144066

>From baf11ed6b40047145ca293988b7d42c3ba7a7b8b Mon Sep 17 00:00:00 2001
From: omkar-mohanty <franzohouser at gmail.com>
Date: Wed, 11 Jun 2025 18:05:39 +0530
Subject: [PATCH] [X86] Truncate i64 add to i32 when upper 33 bits are zeros

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 19 +++++
 llvm/test/CodeGen/X86/reduce-i64-add.ll | 97 +++++++++++++++++++++++++
 2 files changed, 116 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/reduce-i64-add.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 33083c0eba695..4c004fafb049a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57969,8 +57969,27 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
   EVT VT = N->getValueType(0);
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
+  unsigned int Opcode = N->getOpcode();
   SDLoc DL(N);
 
+  // Use a 32-bit add+zext if upper 33 bits known zero.
+  if (VT == MVT::i64 && Subtarget.is64Bit()) {
+    APInt HiMask = APInt::getHighBitsSet(64, 33);
+    if (DAG.MaskedValueIsZero(Op0, HiMask) &&
+        DAG.MaskedValueIsZero(Op1, HiMask)) {
+      SDValue LHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op0);
+      SDValue RHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
+      bool NSW = Op0->getFlags().hasNoSignedWrap();
+      NSW = NSW & DAG.willNotOverflowAdd(true, LHS, RHS);
+      SDNodeFlags Flags;
+      // No unsigned wrap when upper 33 bits are zeros hence always true
+      Flags.setNoUnsignedWrap(true);
+      Flags.setNoSignedWrap(NSW);
+      SDValue Sum = DAG.getNode(Opcode, DL, MVT::i32, LHS, RHS, Flags);
+      return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Sum);
+    }
+  }
+
   if (SDValue Select = pushAddIntoCmovOfConsts(N, DL, DAG, Subtarget))
     return Select;
 
diff --git a/llvm/test/CodeGen/X86/reduce-i64-add.ll b/llvm/test/CodeGen/X86/reduce-i64-add.ll
new file mode 100644
index 0000000000000..6df3707961e8e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/reduce-i64-add.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+define i64 @test1(i16 %a) {
+; X86-LABEL: test1:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $42, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
+;
+; X64-LABEL: test1:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl %di, %eax
+; X64-NEXT:    addl $42, %eax
+; X64-NEXT:    retq
+  %zext_a = zext i16 %a to i64
+  %sum = add nuw nsw i64 %zext_a, 42
+  ret i64 %sum
+}
+
+define i64 @test2(i16 %a, i16 %b) {
+; X86-LABEL: test2:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
+;
+; X64-LABEL: test2:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl %si, %ecx
+; X64-NEXT:    movzwl %di, %eax
+; X64-NEXT:    addl %ecx, %eax
+; X64-NEXT:    retq
+  %zext_a = zext i16 %a to i64
+  %zext_b = zext i16 %b to i64
+; First 48 bits are all zeros so we can safely truncate to 32 bit additon
+  %sum = add nuw nsw i64 %zext_a, %zext_b
+  ret i64 %sum
+}
+
+define i64 @test3(i16 %a) {
+; X86-LABEL: test3:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $42, %eax
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    retl
+;
+; X64-LABEL: test3:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl %di, %ecx
+; X64-NEXT:    movabsq $4294967338, %rax # imm = 0x10000002A
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    retq
+  %zext_a = zext i16 %a to i64
+; Set the 32nd bit of a to force 64 bit addition, we do not truncate to 32 bit addition in this case
+  %or_a = or i64 %zext_a, 4294967296
+  %sum = add nuw nsw i64 %or_a, 42
+  ret i64 %sum
+}
+
+define i64 @test4(i16 %a, i16 %b) {
+; X86-LABEL: test4:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    adcl %esi, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: test4:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    movswq %di, %rcx
+; X64-NEXT:    movswq %si, %rax
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    retq
+  %sext_a = sext i16 %a to i64
+  %sext_b = sext i16 %b to i64
+; We don't truncate to 32 bit addition in case of sign extension
+  %sum = add nuw nsw i64 %sext_a, %sext_b
+  ret i64 %sum
+}