[llvm] db13404 - [x86] combineMul - handle 0/-1 KnownBits cases before MUL_IMM logic (REAPPLIED)

Mon Oct 7 03:26:01 PDT 2024

Author: Simon Pilgrim
Date: 2024-10-07T11:25:42+01:00
New Revision: db1340412e167d828ae406975265529d95da7ef0

URL: https://github.com/llvm/llvm-project/commit/db1340412e167d828ae406975265529d95da7ef0
DIFF: https://github.com/llvm/llvm-project/commit/db1340412e167d828ae406975265529d95da7ef0.diff

LOG: [x86] combineMul - handle 0/-1 KnownBits cases before MUL_IMM logic (REAPPLIED)

Followup to 3d862c78bbb5ecbdfe93996bdf2dcfc64325ae87 fix - always fold multiply to zero/negation

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/mul-constant-i64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 73f6b51907eb35..d4ab0491e7d6b1 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48476,28 +48476,34 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
   if (DCI.isBeforeLegalize() && VT.isVector())
     return reduceVMULWidth(N, DL, DAG, Subtarget);
 
-  // Optimize a single multiply with constant into two operations in order to
-  // implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
-  if (!MulConstantOptimization)
+  if (VT != MVT::i64 && VT != MVT::i32 &&
+      (!VT.isVector() || !VT.isSimple() || !VT.isInteger()))
     return SDValue();
 
-  // An imul is usually smaller than the alternative sequence.
-  if (DAG.getMachineFunction().getFunction().hasMinSize())
+  KnownBits Known1 = DAG.computeKnownBits(N->getOperand(1));
+  if (!Known1.isConstant())
     return SDValue();
 
-  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+  const APInt &C = Known1.getConstant();
+  if (C.isZero())
+    return DAG.getConstant(0, DL, VT);
+
+  if (C.isAllOnes())
+    return DAG.getNegative(N->getOperand(0), DL, VT);
+
+  if (isPowerOf2_64(C.getZExtValue()))
     return SDValue();
 
-  if (VT != MVT::i64 && VT != MVT::i32 &&
-      (!VT.isVector() || !VT.isSimple() || !VT.isInteger()))
+  // Optimize a single multiply with constant into two operations in order to
+  // implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
+  if (!MulConstantOptimization)
     return SDValue();
 
-  KnownBits Known1 = DAG.computeKnownBits(N->getOperand(1));
-  if (!Known1.isConstant())
+  // An imul is usually smaller than the alternative sequence.
+  if (DAG.getMachineFunction().getFunction().hasMinSize())
     return SDValue();
 
-  const APInt &C = Known1.getConstant();
-  if (isPowerOf2_64(C.getZExtValue()) || C.isZero() || C.isAllOnes())
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
     return SDValue();
 
   int64_t SignMulAmt = C.getSExtValue();

diff  --git a/llvm/test/CodeGen/X86/mul-constant-i64.ll b/llvm/test/CodeGen/X86/mul-constant-i64.ll
index a2a1c511302ce7..03dd5351c78acd 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i64.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i64.ll
@@ -1642,59 +1642,13 @@ define i64 @PR111325(i64 %a0, i1 %a1) {
 ; X86-NOOPT-NEXT:    xorl %edx, %edx
 ; X86-NOOPT-NEXT:    retl
 ;
-; X64-HSW-LABEL: PR111325:
-; X64-HSW:       # %bb.0: # %entry
-; X64-HSW-NEXT:    movl $4294967295, %ecx # imm = 0xFFFFFFFF
-; X64-HSW-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
-; X64-HSW-NEXT:    imull %edi, %eax
-; X64-HSW-NEXT:    testb $1, %sil
-; X64-HSW-NEXT:    cmoveq %rcx, %rax
-; X64-HSW-NEXT:    retq
-;
-; X64-JAG-LABEL: PR111325:
-; X64-JAG:       # %bb.0: # %entry
-; X64-JAG-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
-; X64-JAG-NEXT:    movl $4294967295, %ecx # imm = 0xFFFFFFFF
-; X64-JAG-NEXT:    imull %edi, %eax
-; X64-JAG-NEXT:    testb $1, %sil
-; X64-JAG-NEXT:    cmoveq %rcx, %rax
-; X64-JAG-NEXT:    retq
-;
-; X64-SLM-LABEL: PR111325:
-; X64-SLM:       # %bb.0: # %entry
-; X64-SLM-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
-; X64-SLM-NEXT:    movl $4294967295, %ecx # imm = 0xFFFFFFFF
-; X64-SLM-NEXT:    imull %edi, %eax
-; X64-SLM-NEXT:    testb $1, %sil
-; X64-SLM-NEXT:    cmoveq %rcx, %rax
-; X64-SLM-NEXT:    retq
-;
-; X64-HSW-NOOPT-LABEL: PR111325:
-; X64-HSW-NOOPT:       # %bb.0: # %entry
-; X64-HSW-NOOPT-NEXT:    movl $4294967295, %ecx # imm = 0xFFFFFFFF
-; X64-HSW-NOOPT-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
-; X64-HSW-NOOPT-NEXT:    imull %edi, %eax
-; X64-HSW-NOOPT-NEXT:    testb $1, %sil
-; X64-HSW-NOOPT-NEXT:    cmoveq %rcx, %rax
-; X64-HSW-NOOPT-NEXT:    retq
-;
-; X64-JAG-NOOPT-LABEL: PR111325:
-; X64-JAG-NOOPT:       # %bb.0: # %entry
-; X64-JAG-NOOPT-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
-; X64-JAG-NOOPT-NEXT:    movl $4294967295, %ecx # imm = 0xFFFFFFFF
-; X64-JAG-NOOPT-NEXT:    imull %edi, %eax
-; X64-JAG-NOOPT-NEXT:    testb $1, %sil
-; X64-JAG-NOOPT-NEXT:    cmoveq %rcx, %rax
-; X64-JAG-NOOPT-NEXT:    retq
-;
-; X64-SLM-NOOPT-LABEL: PR111325:
-; X64-SLM-NOOPT:       # %bb.0: # %entry
-; X64-SLM-NOOPT-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
-; X64-SLM-NOOPT-NEXT:    movl $4294967295, %ecx # imm = 0xFFFFFFFF
-; X64-SLM-NOOPT-NEXT:    imull %edi, %eax
-; X64-SLM-NOOPT-NEXT:    testb $1, %sil
-; X64-SLM-NOOPT-NEXT:    cmoveq %rcx, %rax
-; X64-SLM-NOOPT-NEXT:    retq
+; X64-LABEL: PR111325:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    negl %edi
+; X64-NEXT:    testb $1, %sil
+; X64-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-NEXT:    cmovneq %rdi, %rax
+; X64-NEXT:    retq
 entry:
   %mul = mul i64 %a0, 4294967295
   %mask = and i64 %mul, 4294967295