[llvm] [X86] Truncate i64 sub to i32 when upper 33 bits are zeros (PR #145850)
Omkar Mohanty via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 26 00:58:15 PDT 2025
https://github.com/omkar-mohanty updated https://github.com/llvm/llvm-project/pull/145850
>From 99fbe61321d66714a7687fc355a407e706789b7c Mon Sep 17 00:00:00 2001
From: omkar-mohanty <franzohouser at gmail.com>
Date: Mon, 23 Jun 2025 18:17:50 +0530
Subject: [PATCH] [X86] Truncate i64 sub to i32 when upper 33 bits are zeros
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 20 +++++++
llvm/test/CodeGen/X86/reduce-i64-sub.ll | 78 +++++++++++++++++++++++++
2 files changed, 98 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/reduce-i64-sub.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7c26dd6e2dc2f..e10ebc129ecb3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58191,8 +58191,28 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
+ unsigned int Opcode = N->getOpcode();
SDLoc DL(N);
+ // Use a 32-bit sub+zext if upper 33 bits known zero.
+ if (VT == MVT::i64 && Subtarget.is64Bit()) {
+ APInt HiMask = APInt::getHighBitsSet(64, 33);
+ if (DAG.MaskedValueIsZero(Op0, HiMask) &&
+ DAG.MaskedValueIsZero(Op1, HiMask)) {
+ SDValue LHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op0);
+ SDValue RHS = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
+ bool NUW = Op0->getFlags().hasNoUnsignedWrap();
+ NUW = NUW & DAG.willNotOverflowAdd(false, LHS, RHS);
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(NUW);
+ // Always true since in the worst case 0 - 2147483647 = -2147483647, still
+ // fits in i32
+ Flags.setNoSignedWrap(true);
+ SDValue Sub = DAG.getNode(Opcode, DL, MVT::i32, LHS, RHS, Flags);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Sub);
+ }
+ }
+
auto IsNonOpaqueConstant = [&](SDValue Op) {
return DAG.isConstantIntBuildVectorOrConstantInt(Op,
/*AllowOpaques*/ false);
diff --git a/llvm/test/CodeGen/X86/reduce-i64-sub.ll b/llvm/test/CodeGen/X86/reduce-i64-sub.ll
new file mode 100644
index 0000000000000..793df235bd9e4
--- /dev/null
+++ b/llvm/test/CodeGen/X86/reduce-i64-sub.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+; Truncate to 32 bit subtraction since first 48 bits are zeros
+define i64 @test1(i16 %a, i16 %b) nounwind {
+; X86-LABEL: test1:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: sbbl %edx, %edx
+; X86-NEXT: retl
+;
+; X64-LABEL: test1:
+; X64: # %bb.0:
+; X64-NEXT: movzwl %si, %ecx
+; X64-NEXT: movzwl %di, %eax
+; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: retq
+ %zext_a = zext i16 %a to i64
+ %zext_b = zext i16 %b to i64
+ %sub = sub i64 %zext_a, %zext_b
+ ret i64 %sub
+}
+
+; Do not truncate to 32 bit subtraction if 32nd bit is set
+define i64 @test2(i16 %a, i16 %b) nounwind {
+; X86-LABEL: test2:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl $1, %edx
+; X86-NEXT: sbbl $0, %edx
+; X86-NEXT: retl
+;
+; X64-LABEL: test2:
+; X64: # %bb.0:
+; X64-NEXT: movzwl %di, %ecx
+; X64-NEXT: movzwl %si, %edx
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rcx, %rax
+; X64-NEXT: subq %rdx, %rax
+; X64-NEXT: retq
+ %zext_a = zext i16 %a to i64
+ %zext_b = zext i16 %b to i64
+ %or_a = or i64 %zext_a, 4294967296
+ %sub = sub i64 %or_a, %zext_b
+ ret i64 %sub
+}
+
+; Do not truncate to 32 bit subtraction in case of sign extension
+define i64 @test3(i16 %a, i16 %b) nounwind {
+; X86-LABEL: test3:
+; X86: # %bb.0:
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: sbbl $0, %edx
+; X86-NEXT: retl
+;
+; X64-LABEL: test3:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: movswq %di, %rax
+; X64-NEXT: movzwl %si, %ecx
+; X64-NEXT: subq %rcx, %rax
+; X64-NEXT: retq
+ %sext_a = sext i16 %a to i64
+ %zext_b = zext i16 %b to i64
+ %sub = sub i64 %sext_a, %zext_b
+ ret i64 %sub
+}
+
More information about the llvm-commits
mailing list