[llvm] [X86] getIntImmCostInst - recognise i64 ICMP EQ/NE special cases (PR #142812)

Wed Jun 4 09:48:32 PDT 2025

https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/142812

If the lower 32-bits of a i64 value is known to be zero, then x86 icmp lowering will shift+truncate down to a i32 allowing the immediate to be embedded.

There's a lot more that could be done here to match x86 icmp lowering, but this PR just focuses on known regressions.

Fixes #142513
Fixes #62145

>From e5ab2d35736be4c0e4ee73c74f4ac5a71b69249b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 4 Jun 2025 17:46:55 +0100
Subject: [PATCH] [X86] getIntImmCostInst - recognise i64 ICMP EQ/NE special
 cases

If the lower 32-bits of a i64 value is known to be zero, then icmp lowering will shift+truncate down to a i32 allowing the immediate to be embedded

There's a lot more that could be done here to match x86 icmp lowering, but this PR just focuses on known regressions.

Fixes #142513
Fixes #62145
---
 .../lib/Target/X86/X86TargetTransformInfo.cpp | 11 +++++++++--
 llvm/test/CodeGen/X86/pr142513.ll             |  3 +--
 llvm/test/CodeGen/X86/pr62145.ll              | 19 +++++++------------
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 9864adc4dcc95..33c9edd24646b 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -5993,12 +5993,19 @@ InstructionCost X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
     // This is an imperfect hack to prevent constant hoisting of
     // compares that might be trying to check if a 64-bit value fits in
     // 32-bits. The backend can optimize these cases using a right shift by 32.
-    // Ideally we would check the compare predicate here. There also other
-    // similar immediates the backend can use shifts for.
+    // There are other predicates and immediates the backend can use shifts for.
     if (Idx == 1 && ImmBitWidth == 64) {
       uint64_t ImmVal = Imm.getZExtValue();
       if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
         return TTI::TCC_Free;
+
+      if (auto *Cmp = dyn_cast_or_null<CmpInst>(Inst)) {
+        if (Cmp->isEquality()) {
+          KnownBits Known = computeKnownBits(Cmp->getOperand(0), DL);
+          if (Known.countMinTrailingZeros() >= 32)
+            return TTI::TCC_Free;
+        }
+      }
     }
     ImmIdx = 1;
     break;
diff --git a/llvm/test/CodeGen/X86/pr142513.ll b/llvm/test/CodeGen/X86/pr142513.ll
index 8503c9bada2e8..fe969104fcf5e 100644
--- a/llvm/test/CodeGen/X86/pr142513.ll
+++ b/llvm/test/CodeGen/X86/pr142513.ll
@@ -21,8 +21,7 @@ define i64 @foo(i64 %x) {
 ; X64-NEXT:    cmpl $65509, %edi # imm = 0xFFE5
 ; X64-NEXT:    je .LBB0_1
 ; X64-NEXT:  # %bb.2: # %if.end
-; X64-NEXT:    movabsq $9219572124669181952, %rax # imm = 0x7FF2800000000000
-; X64-NEXT:    addq $3, %rax
+; X64-NEXT:    movabsq $9219572124669181955, %rax # imm = 0x7FF2800000000003
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB0_1: # %if.then
 entry:
diff --git a/llvm/test/CodeGen/X86/pr62145.ll b/llvm/test/CodeGen/X86/pr62145.ll
index 509708fb417c4..38208422be6b4 100644
--- a/llvm/test/CodeGen/X86/pr62145.ll
+++ b/llvm/test/CodeGen/X86/pr62145.ll
@@ -5,41 +5,36 @@
 define void @f(i64 %a, i64 %b) nounwind {
 ; X86-LABEL: f:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $-65536, %ebx # imm = 0xFFFF0000
-; X86-NEXT:    movl $-589824, %edi # imm = 0xFFF70000
+; X86-NEXT:    movl $-65536, %edi # imm = 0xFFFF0000
 ; X86-NEXT:    cmpl $65527, %eax # imm = 0xFFF7
 ; X86-NEXT:    jne .LBB0_2
 ; X86-NEXT:  # %bb.1: # %if.then
 ; X86-NEXT:    calll ext1 at PLT
 ; X86-NEXT:  .LBB0_2: # %if.end
 ; X86-NEXT:    calll ext2 at PLT
-; X86-NEXT:    andl %ebx, %esi
-; X86-NEXT:    xorl %edi, %esi
+; X86-NEXT:    andl %edi, %esi
+; X86-NEXT:    cmpl $-589824, %esi # imm = 0xFFF70000
 ; X86-NEXT:    jne .LBB0_3
 ; X86-NEXT:  # %bb.4: # %if.then2
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
-; X86-NEXT:    popl %ebx
 ; X86-NEXT:    jmp ext1 at PLT # TAILCALL
 ; X86-NEXT:  .LBB0_3: # %if.end3
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
-; X86-NEXT:    popl %ebx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: f:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %r15
 ; X64-NEXT:    pushq %r14
 ; X64-NEXT:    pushq %rbx
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movq %rsi, %rbx
 ; X64-NEXT:    movabsq $-281474976710656, %r14 # imm = 0xFFFF000000000000
-; X64-NEXT:    movabsq $-2533274790395904, %r15 # imm = 0xFFF7000000000000
 ; X64-NEXT:    shrq $48, %rdi
 ; X64-NEXT:    cmpl $65527, %edi # imm = 0xFFF7
 ; X64-NEXT:    jne .LBB0_2
@@ -48,17 +43,17 @@ define void @f(i64 %a, i64 %b) nounwind {
 ; X64-NEXT:  .LBB0_2: # %if.end
 ; X64-NEXT:    callq ext2 at PLT
 ; X64-NEXT:    andq %r14, %rbx
-; X64-NEXT:    cmpq %r15, %rbx
+; X64-NEXT:    movabsq $-2533274790395904, %rax # imm = 0xFFF7000000000000
+; X64-NEXT:    addq $8, %rsp
+; X64-NEXT:    cmpq %rax, %rbx
 ; X64-NEXT:    jne .LBB0_3
 ; X64-NEXT:  # %bb.4: # %if.then2
 ; X64-NEXT:    popq %rbx
 ; X64-NEXT:    popq %r14
-; X64-NEXT:    popq %r15
 ; X64-NEXT:    jmp ext1 at PLT # TAILCALL
 ; X64-NEXT:  .LBB0_3: # %if.end3
 ; X64-NEXT:    popq %rbx
 ; X64-NEXT:    popq %r14
-; X64-NEXT:    popq %r15
 ; X64-NEXT:    retq
 entry:
   %shr.mask.i = and i64 %a, -281474976710656