[llvm] [X86] Truncate i64 add/sub/mul arithmetic to i32 with known zeros in upper 32 bits (PR #143313)
Omkar Mohanty via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 11 01:00:27 PDT 2025
https://github.com/omkar-mohanty updated https://github.com/llvm/llvm-project/pull/143313
>From 69281a1cc6175abb8c09823927e839c23ffcb93e Mon Sep 17 00:00:00 2001
From: omkar-mohanty <franzohouser at gmail.com>
Date: Mon, 9 Jun 2025 13:13:18 +0530
Subject: [PATCH] [X86] Reduce i64 to i32 when high bits are zeros for
add/sub/mul
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 67 +++++++++++++++++++++++++
llvm/test/CodeGen/X86/reduce-i64-add.ll | 28 +++++++++++
llvm/test/CodeGen/X86/reduce-i64-mul.ll | 29 +++++++++++
llvm/test/CodeGen/X86/reduce-i64-sub.ll | 35 +++++++++++++
4 files changed, 159 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/reduce-i64-add.ll
create mode 100644 llvm/test/CodeGen/X86/reduce-i64-mul.ll
create mode 100644 llvm/test/CodeGen/X86/reduce-i64-sub.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 96714adf78e43..365ca8023a75a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49803,8 +49803,35 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ unsigned int Opcode = N->getOpcode();
SDLoc DL(N);
+ // If both operands of a 64-bit multiply are known to have their upper 48 bits
+ // zero, the result is guaranteed to fit in 32 bits. For example:
+ // (i16::MAX * i16::MAX) = 32767 * 32767 = 1073676289
+ // which fits within a signed 32-bit integer (i32::MAX = 2,147,483,647).
+ // In such cases, we can safely perform the multiplication as a 32-bit signed
+ // `mul` followed by a zero-extension to i64.
+ if (VT == MVT::i64 && Subtarget.is64Bit()) {
+ APInt HiMask = APInt::getHighBitsSet(64, 48);
+ if (DAG.MaskedValueIsZero(Op0, HiMask) &&
+ DAG.MaskedValueIsZero(Op1, HiMask)) {
+ SDValue LHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op0);
+ SDValue RHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
+ bool NSW = Op0->getFlags().hasNoSignedWrap();
+ bool NUW = Op0->getFlags().hasNoUnsignedWrap();
+ NSW = NSW & DAG.willNotOverflowMul(true, LHS, RHS);
+ NUW = NUW & DAG.willNotOverflowMul(false, LHS, RHS);
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(NUW);
+ Flags.setNoSignedWrap(NSW);
+ SDValue Mul = DAG.getNode(Opcode, DL, MVT::i32, LHS, RHS, Flags);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Mul);
+ }
+ }
+
if (SDValue V = combineMulToPMADDWD(N, DL, DAG, Subtarget))
return V;
@@ -58070,8 +58097,28 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
+ unsigned int Opcode = N->getOpcode();
SDLoc DL(N);
+ // Use a 32-bit add+zext if upper 33 bits known zero.
+ if (VT == MVT::i64 && Subtarget.is64Bit()) {
+ APInt HiMask = APInt::getHighBitsSet(64, 33);
+ if (DAG.MaskedValueIsZero(Op0, HiMask) &&
+ DAG.MaskedValueIsZero(Op1, HiMask)) {
+ SDValue LHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op0);
+ SDValue RHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
+ bool NSW = Op0->getFlags().hasNoSignedWrap();
+ bool NUW = Op0->getFlags().hasNoUnsignedWrap();
+ NSW = NSW & DAG.willNotOverflowAdd(true, LHS, RHS);
+ NUW = NUW & DAG.willNotOverflowAdd(false, LHS, RHS);
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(NUW);
+ Flags.setNoSignedWrap(NSW);
+ SDValue Sum = DAG.getNode(Opcode, DL, MVT::i32, LHS, RHS, Flags);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Sum);
+ }
+ }
+
if (SDValue Select = pushAddIntoCmovOfConsts(N, DL, DAG, Subtarget))
return Select;
@@ -58297,8 +58344,28 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
+ unsigned int Opcode = N->getOpcode();
SDLoc DL(N);
+ // Use a 32-bit sub+zext if upper 33 bits known zero.
+ if (VT == MVT::i64 && Subtarget.is64Bit()) {
+ APInt HiMask = APInt::getHighBitsSet(64, 33);
+ if (DAG.MaskedValueIsZero(Op0, HiMask) &&
+ DAG.MaskedValueIsZero(Op1, HiMask)) {
+ SDValue LHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op0);
+ SDValue RHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
+ bool NSW = Op0->getFlags().hasNoSignedWrap();
+ bool NUW = Op0->getFlags().hasNoUnsignedWrap();
+ NSW = NSW & DAG.willNotOverflowSub(true, LHS, RHS);
+ NUW = NUW & DAG.willNotOverflowSub(false, LHS, RHS);
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(NUW);
+ Flags.setNoSignedWrap(NSW);
+ SDValue Sub = DAG.getNode(Opcode, DL, MVT::i32, LHS, RHS, Flags);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Sub);
+ }
+ }
+
auto IsNonOpaqueConstant = [&](SDValue Op) {
return DAG.isConstantIntBuildVectorOrConstantInt(Op,
/*AllowOpaques*/ false);
diff --git a/llvm/test/CodeGen/X86/reduce-i64-add.ll b/llvm/test/CodeGen/X86/reduce-i64-add.ll
new file mode 100644
index 0000000000000..97d98df739d80
--- /dev/null
+++ b/llvm/test/CodeGen/X86/reduce-i64-add.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s --check-prefix=X64-LINUX
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s --check-prefix=X64-WIN32
+
+define i64 @test1(i16 %a) {
+; X86-LABEL: test1:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $42, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: retl
+;
+; X64-LINUX-LABEL: test1:
+; X64-LINUX: # %bb.0:
+; X64-LINUX-NEXT: movzwl %di, %eax
+; X64-LINUX-NEXT: addl $42, %eax
+; X64-LINUX-NEXT: retq
+;
+; X64-WIN32-LABEL: test1:
+; X64-WIN32: # %bb.0:
+; X64-WIN32-NEXT: movzwl %cx, %eax
+; X64-WIN32-NEXT: addl $42, %eax
+; X64-WIN32-NEXT: retq
+ %zext_a = zext i16 %a to i64
+ %sum = add i64 %zext_a, 42
+ ret i64 %sum
+}
diff --git a/llvm/test/CodeGen/X86/reduce-i64-mul.ll b/llvm/test/CodeGen/X86/reduce-i64-mul.ll
new file mode 100644
index 0000000000000..b592c0ba270bc
--- /dev/null
+++ b/llvm/test/CodeGen/X86/reduce-i64-mul.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s --check-prefix=X64-LINUX
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s --check-prefix=X64-WIN32
+
+define i64 @test1(i16 %a) {
+; X86-LABEL: test1:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl $42, %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: retl
+;
+; X64-LINUX-LABEL: test1:
+; X64-LINUX: # %bb.0:
+; X64-LINUX-NEXT: movzwl %di, %eax
+; X64-LINUX-NEXT: imull $42, %eax, %eax
+; X64-LINUX-NEXT: retq
+;
+; X64-WIN32-LABEL: test1:
+; X64-WIN32: # %bb.0:
+; X64-WIN32-NEXT: movzwl %cx, %eax
+; X64-WIN32-NEXT: imull $42, %eax, %eax
+; X64-WIN32-NEXT: retq
+
+ %zext_a = zext i16 %a to i64
+ %mul = mul i64 %zext_a, 42
+ ret i64 %mul
+}
diff --git a/llvm/test/CodeGen/X86/reduce-i64-sub.ll b/llvm/test/CodeGen/X86/reduce-i64-sub.ll
new file mode 100644
index 0000000000000..9cabe7fc1a1e6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/reduce-i64-sub.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -o - %s | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s --check-prefix=X64-LINUX
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s --check-prefix=X64-WIN32
+
+define i64 @test1(i16 %a) nounwind {
+; CHECK-LABEL: test1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movzwl %di, %eax
+; CHECK-NEXT: addl $42, %eax
+; CHECK-NEXT: retq
+;
+; X86-LABEL: test1:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $42, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: retl
+;
+; X64-LINUX-LABEL: test1:
+; X64-LINUX: # %bb.0:
+; X64-LINUX-NEXT: movzwl %di, %eax
+; X64-LINUX-NEXT: addl $42, %eax
+; X64-LINUX-NEXT: retq
+;
+; X64-WIN32-LABEL: test1:
+; X64-WIN32: # %bb.0:
+; X64-WIN32-NEXT: movzwl %cx, %eax
+; X64-WIN32-NEXT: addl $42, %eax
+; X64-WIN32-NEXT: retq
+ %zext_a = zext i16 %a to i64
+ %sub = sub i64 %zext_a, -42
+ ret i64 %sub
+}
More information about the llvm-commits
mailing list