[llvm] [X86] Support icmp_i64(i64 X, i64 Y) --> icmp_i32(trunc(X), trunc(Y)) for non-constant Y (PR #91085)

Sat May 4 14:24:44 PDT 2024

https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/91085

Relax the constraint on the fold to allow any value for X and Y iff all upper 32-bits are known zero

Minor code-size saving by moving to i32 bit instructions

>From b5f1250b094dc9985acb5327f2e52c11f51a378e Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Sat, 4 May 2024 22:23:28 +0100
Subject: [PATCH] [X86] Support icmp_i64(i64 X, i64 Y) --> icmp_i32(trunc(X),
 trunc(Y)) for non-constant Y

Relax the constraint on the fold to allow any value for X and Y iff all upper 32-bits are known zero

Minor code-size saving by moving to i32 bit instructions
---
 llvm/lib/Target/X86/X86ISelLowering.cpp              |  6 +++---
 llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll            | 10 +++++-----
 llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll |  8 ++++----
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cf4a64ffded2e8..2ffb4c596463cc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22708,10 +22708,10 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
   }
 
   // Try to shrink i64 compares if the input has enough zero bits.
-  // FIXME: Do this for non-constant compares for constant on LHS?
-  if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) &&
+  // TODO: Add sign-bits equivalent for isX86CCSigned(X86CC)?
+  if (CmpVT == MVT::i64 && !isX86CCSigned(X86CC) &&
       Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub.
-      Op1->getAsAPIntVal().getActiveBits() <= 32 &&
+      DAG.MaskedValueIsZero(Op1, APInt::getHighBitsSet(64, 32)) &&
       DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) {
     CmpVT = MVT::i32;
     Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0);
diff --git a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
index bc546fe857a3ed..67070b989786db 100644
--- a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
+++ b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
@@ -174,7 +174,7 @@ define i1 @shl_to_shr_eq_i64_s44(i64 %x) {
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    shrq $44, %rax
 ; CHECK-NEXT:    andl $1048575, %edi # imm = 0xFFFFF
-; CHECK-NEXT:    cmpq %rax, %rdi
+; CHECK-NEXT:    cmpl %eax, %edi
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %shl = shl i64 %x, 44
@@ -186,9 +186,9 @@ define i1 @shl_to_shr_eq_i64_s44(i64 %x) {
 define i1 @shr_to_shl_ne_i64_s32(i64 %x) {
 ; CHECK-NOBMI-LABEL: shr_to_shl_ne_i64_s32:
 ; CHECK-NOBMI:       # %bb.0:
-; CHECK-NOBMI-NEXT:    movl %edi, %eax
-; CHECK-NOBMI-NEXT:    shrq $32, %rdi
-; CHECK-NOBMI-NEXT:    cmpq %rdi, %rax
+; CHECK-NOBMI-NEXT:    movq %rdi, %rax
+; CHECK-NOBMI-NEXT:    shrq $32, %rax
+; CHECK-NOBMI-NEXT:    cmpl %eax, %edi
 ; CHECK-NOBMI-NEXT:    setne %al
 ; CHECK-NOBMI-NEXT:    retq
 ;
@@ -244,7 +244,7 @@ define i1 @shl_to_shr_eq_i64_s63(i64 %x) {
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    shrq $63, %rax
 ; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    cmpq %rax, %rdi
+; CHECK-NEXT:    cmpl %eax, %edi
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %shl = shl i64 %x, 63
diff --git a/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll b/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll
index fd29d09d91960a..e33c99be0ed093 100644
--- a/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll
@@ -115,7 +115,7 @@ define void @vector_variable_shift_left_loop(ptr nocapture %arr, ptr nocapture r
 ; SSE-NEXT:    cmpq %rcx, %rdx
 ; SSE-NEXT:    jne .LBB0_4
 ; SSE-NEXT:  # %bb.5: # %middle.block
-; SSE-NEXT:    cmpq %r9, %rdx
+; SSE-NEXT:    cmpl %r9d, %edx
 ; SSE-NEXT:    jne .LBB0_6
 ; SSE-NEXT:  .LBB0_9: # %for.cond.cleanup
 ; SSE-NEXT:    retq
@@ -239,7 +239,7 @@ define void @vector_variable_shift_left_loop(ptr nocapture %arr, ptr nocapture r
 ; AVX1-NEXT:    cmpq %rcx, %rdx
 ; AVX1-NEXT:    jne .LBB0_4
 ; AVX1-NEXT:  # %bb.5: # %middle.block
-; AVX1-NEXT:    cmpq %r9, %rdx
+; AVX1-NEXT:    cmpl %r9d, %edx
 ; AVX1-NEXT:    jne .LBB0_6
 ; AVX1-NEXT:  .LBB0_9: # %for.cond.cleanup
 ; AVX1-NEXT:    vzeroupper
@@ -314,7 +314,7 @@ define void @vector_variable_shift_left_loop(ptr nocapture %arr, ptr nocapture r
 ; AVX2-NEXT:    cmpq %rcx, %rdx
 ; AVX2-NEXT:    jne .LBB0_4
 ; AVX2-NEXT:  # %bb.5: # %middle.block
-; AVX2-NEXT:    cmpq %r9, %rdx
+; AVX2-NEXT:    cmpl %r9d, %edx
 ; AVX2-NEXT:    jne .LBB0_6
 ; AVX2-NEXT:  .LBB0_9: # %for.cond.cleanup
 ; AVX2-NEXT:    vzeroupper
@@ -413,7 +413,7 @@ define void @vector_variable_shift_left_loop(ptr nocapture %arr, ptr nocapture r
 ; XOP-NEXT:    cmpq %rcx, %rdx
 ; XOP-NEXT:    jne .LBB0_4
 ; XOP-NEXT:  # %bb.5: # %middle.block
-; XOP-NEXT:    cmpq %r9, %rdx
+; XOP-NEXT:    cmpl %r9d, %edx
 ; XOP-NEXT:    jne .LBB0_6
 ; XOP-NEXT:  .LBB0_9: # %for.cond.cleanup
 ; XOP-NEXT:    vzeroupper