[llvm] [X86] Truncate i64 sub to i32 when upper 33 bits are zeros (PR #145850)
Omkar Mohanty via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 30 04:23:22 PDT 2025
================
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+; Truncate to 32 bit subtraction since first 48 bits are zeros
+define i64 @test1(i16 %a, i16 %b) nounwind {
+; X86-LABEL: test1:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: sbbl %edx, %edx
----------------
omkar-mohanty wrote:
Interestingly in PR #144066 there are no redundant `adc` instructions which is not the case for the `sbb` instructions in this PR. Upon digging further I found out that `combineADC` handles the case when both operands are zeros in this code
```c++
static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
auto *LHSC = dyn_cast<ConstantSDNode>(LHS);
auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
// Canonicalize constant to RHS.
if (LHSC && !RHSC)
return DAG.getNode(X86ISD::ADC, SDLoc(N), N->getVTList(), RHS, LHS,
CarryIn);
// If the LHS and RHS of the ADC node are zero, then it can't overflow and
// the result is either zero or one (depending on the input carry bit).
// Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
if (LHSC && RHSC && LHSC->isZero() && RHSC->isZero() &&
// We don't have a good way to replace an EFLAGS use, so only do this when
// dead right now.
SDValue(N, 1).use_empty()) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1));
SDValue Res1 = DAG.getNode(
ISD::AND, DL, VT,
DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), CarryIn),
DAG.getConstant(1, DL, VT));
return DCI.CombineTo(N, Res1, CarryOut);
}
```
But `combineSBB` does not handle this case
```c++
static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue BorrowIn = N->getOperand(2);
if (SDValue Flags = combineCarryThroughADD(BorrowIn, DAG)) {
MVT VT = N->getSimpleValueType(0);
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs, LHS, RHS, Flags);
}
// Fold SBB(SUB(X,Y),0,Carry) -> SBB(X,Y,Carry)
// iff the flag result is dead.
if (LHS.getOpcode() == ISD::SUB && isNullConstant(RHS) &&
!N->hasAnyUseOfValue(1))
return DAG.getNode(X86ISD::SBB, SDLoc(N), N->getVTList(), LHS.getOperand(0),
LHS.getOperand(1), BorrowIn);
return SDValue();
}
```
So should we add the code to handle redundant `sbb`'s in this PR or make it a separate issue?
https://github.com/llvm/llvm-project/pull/145850
More information about the llvm-commits
mailing list