[llvm] 78a28b3 - [X86] Support icmp_i64(i64 X, i64 Y) --> icmp_i32(trunc(X), trunc(Y)) for non-constant Y (#91085)

Sun May 5 02:15:04 PDT 2024

Author: Simon Pilgrim
Date: 2024-05-05T10:15:00+01:00
New Revision: 78a28b3ef7de2a07c8d01cace58019068a17a0bf

URL: https://github.com/llvm/llvm-project/commit/78a28b3ef7de2a07c8d01cace58019068a17a0bf
DIFF: https://github.com/llvm/llvm-project/commit/78a28b3ef7de2a07c8d01cace58019068a17a0bf.diff

LOG: [X86] Support icmp_i64(i64 X, i64 Y) --> icmp_i32(trunc(X), trunc(Y)) for non-constant Y (#91085)

Relax the constraint on the fold to allow any value for X and Y iff all upper 32-bits are known zero

Minor code-size saving by moving to i32 bit instructions

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
    llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cf4a64ffded2e8..2ffb4c596463cc 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22708,10 +22708,10 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
   }
 
   // Try to shrink i64 compares if the input has enough zero bits.
-  // FIXME: Do this for non-constant compares for constant on LHS?
-  if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) &&
+  // TODO: Add sign-bits equivalent for isX86CCSigned(X86CC)?
+  if (CmpVT == MVT::i64 && !isX86CCSigned(X86CC) &&
       Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub.
-      Op1->getAsAPIntVal().getActiveBits() <= 32 &&
+      DAG.MaskedValueIsZero(Op1, APInt::getHighBitsSet(64, 32)) &&
       DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) {
     CmpVT = MVT::i32;
     Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0);

diff  --git a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
index bc546fe857a3ed..67070b989786db 100644
--- a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
+++ b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
@@ -174,7 +174,7 @@ define i1 @shl_to_shr_eq_i64_s44(i64 %x) {
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    shrq $44, %rax
 ; CHECK-NEXT:    andl $1048575, %edi # imm = 0xFFFFF
-; CHECK-NEXT:    cmpq %rax, %rdi
+; CHECK-NEXT:    cmpl %eax, %edi
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %shl = shl i64 %x, 44
@@ -186,9 +186,9 @@ define i1 @shl_to_shr_eq_i64_s44(i64 %x) {
 define i1 @shr_to_shl_ne_i64_s32(i64 %x) {
 ; CHECK-NOBMI-LABEL: shr_to_shl_ne_i64_s32:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    movl %edi, %eax
-; CHECK-NOBMI-NEXT:    shrq $32, %rdi
-; CHECK-NOBMI-NEXT:    cmpq %rdi, %rax
+; CHECK-NOBMI-NEXT:    movq %rdi, %rax
+; CHECK-NOBMI-NEXT:    shrq $32, %rax
+; CHECK-NOBMI-NEXT:    cmpl %eax, %edi
 ; CHECK-NOBMI-NEXT:    setne %al
 ; CHECK-NOBMI-NEXT:    retq
 ;
@@ -244,7 +244,7 @@ define i1 @shl_to_shr_eq_i64_s63(i64 %x) {
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    shrq $63, %rax
 ; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    cmpq %rax, %rdi
+; CHECK-NEXT:    cmpl %eax, %edi
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %shl = shl i64 %x, 63

diff  --git a/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll b/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll
index fd29d09d91960a..e33c99be0ed093 100644
--- a/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll
@@ -115,7 +115,7 @@ define void @vector_variable_shift_left_loop(ptr nocapture %arr, ptr nocapture r
 ; SSE-NEXT:    cmpq %rcx, %rdx
 ; SSE-NEXT:    jne .LBB0_4
 ; SSE-NEXT:  # %bb.5: # %middle.block
-; SSE-NEXT:    cmpq %r9, %rdx
+; SSE-NEXT:    cmpl %r9d, %edx
 ; SSE-NEXT:    jne .LBB0_6
 ; SSE-NEXT:  .LBB0_9: # %for.cond.cleanup
 ; SSE-NEXT:    retq
@@ -239,7 +239,7 @@ define void @vector_variable_shift_left_loop(ptr nocapture %arr, ptr nocapture r
 ; AVX1-NEXT:    cmpq %rcx, %rdx
 ; AVX1-NEXT:    jne .LBB0_4
 ; AVX1-NEXT:  # %bb.5: # %middle.block
-; AVX1-NEXT:    cmpq %r9, %rdx
+; AVX1-NEXT:    cmpl %r9d, %edx
 ; AVX1-NEXT:    jne .LBB0_6
 ; AVX1-NEXT:  .LBB0_9: # %for.cond.cleanup
 ; AVX1-NEXT:    vzeroupper
@@ -314,7 +314,7 @@ define void @vector_variable_shift_left_loop(ptr nocapture %arr, ptr nocapture r
 ; AVX2-NEXT:    cmpq %rcx, %rdx
 ; AVX2-NEXT:    jne .LBB0_4
 ; AVX2-NEXT:  # %bb.5: # %middle.block
-; AVX2-NEXT:    cmpq %r9, %rdx
+; AVX2-NEXT:    cmpl %r9d, %edx
 ; AVX2-NEXT:    jne .LBB0_6
 ; AVX2-NEXT:  .LBB0_9: # %for.cond.cleanup
 ; AVX2-NEXT:    vzeroupper
@@ -413,7 +413,7 @@ define void @vector_variable_shift_left_loop(ptr nocapture %arr, ptr nocapture r
 ; XOP-NEXT:    cmpq %rcx, %rdx
 ; XOP-NEXT:    jne .LBB0_4
 ; XOP-NEXT:  # %bb.5: # %middle.block
-; XOP-NEXT:    cmpq %r9, %rdx
+; XOP-NEXT:    cmpl %r9d, %edx
 ; XOP-NEXT:    jne .LBB0_6
 ; XOP-NEXT:  .LBB0_9: # %for.cond.cleanup
 ; XOP-NEXT:    vzeroupper