[llvm] [X86] Fold some (truncate (srl (add X, C1), C2)) patterns to (add (truncate (srl X, C2), C1')) (PR #126448)

Sun Feb 9 16:30:25 PST 2025

https://github.com/joaotgouveia created https://github.com/llvm/llvm-project/pull/126448

Addresses the poor codegen identified in #123239 and a few extra cases. This transformation is correct for `eq` (https://alive2.llvm.org/ce/z/qZhwtT), `ne` (https://alive2.llvm.org/ce/z/6gsmNz), `ult` (https://alive2.llvm.org/ce/z/xip_td) and `ugt` (https://alive2.llvm.org/ce/z/39XQkX).

>From 3d735253520f81cc5f6211ad32500033247f04ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Sun, 9 Feb 2025 23:16:18 +0000
Subject: [PATCH 1/2] [X86] Add test coverage for some i64 cmp+srl+add cases

---
 .../CodeGen/X86/combine-setcc-trunc-add.ll    | 128 ++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll

diff --git a/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll b/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll
new file mode 100644
index 00000000000000..67383427d96620
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll
@@ -0,0 +1,128 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
+
+; Test for https://github.com/llvm/llvm-project/issues/123239
+
+define i1 @test_ult_trunc_add(i64 %x) {
+; X64-LABEL: test_ult_trunc_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
+entry:
+  %add = add i64 %x, 3940649673949184
+  %shr = lshr i64 %add, 48
+  %conv = trunc i64 %shr to i32
+  %res = icmp ult i32 %conv, 3
+  ret i1 %res
+}
+
+define i1 @test_ult_add(i64 %x) {
+; X64-LABEL: test_ult_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
+entry:
+    %0 = add i64 3940649673949184, %x
+    %1 = icmp ult i64 %0, 844424930131968
+    ret i1 %1
+}
+
+define i1 @test_ugt_trunc_add(i64 %x) {
+; X64-LABEL: test_ugt_trunc_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    cmpl $4, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    retq
+entry:
+  %add = add i64 %x, 3940649673949184
+  %shr = lshr i64 %add, 48
+  %conv = trunc i64 %shr to i32
+  %res = icmp ugt i32 %conv, 3
+  ret i1 %res
+}
+
+define i1 @test_ugt_add(i64 %x) {
+; X64-LABEL: test_ugt_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    movabsq $844424930131968, %rcx # imm = 0x3000000000000
+; X64-NEXT:    cmpq %rcx, %rax
+; X64-NEXT:    seta %al
+; X64-NEXT:    retq
+entry:
+    %0 = add i64 3940649673949184, %x
+    %1 = icmp ugt i64 %0, 844424930131968
+    ret i1 %1
+}
+
+define i1 @test_eq_trunc_add(i64 %x) {
+; X64-LABEL: test_eq_trunc_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+entry:
+  %add = add i64 %x, 3940649673949184
+  %shr = lshr i64 %add, 48
+  %conv = trunc i64 %shr to i32
+  %res = icmp eq i32 %conv, 3
+  ret i1 %res
+}
+
+define i1 @test_eq_add(i64 %x) {
+; X64-LABEL: test_eq_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000
+; X64-NEXT:    cmpq %rax, %rdi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+entry:
+    %0 = add i64 3940649673949184, %x
+    %1 = icmp eq i64 %0, 844424930131968
+    ret i1 %1
+}
+
+define i1 @test_ne_trunc_add(i64 %x) {
+; X64-LABEL: test_ne_trunc_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+entry:
+  %add = add i64 %x, 3940649673949184
+  %shr = lshr i64 %add, 48
+  %conv = trunc i64 %shr to i32
+  %res = icmp ne i32 %conv, 3
+  ret i1 %res
+}
+
+define i1 @test_ne_add(i64 %x) {
+; X64-LABEL: test_ne_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000
+; X64-NEXT:    cmpq %rax, %rdi
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+entry:
+    %0 = add i64 3940649673949184, %x
+    %1 = icmp ne i64 %0, 844424930131968
+    ret i1 %1
+}

>From f53089995d91c9b423ae028c16f71e4de0a7b567 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Sun, 9 Feb 2025 23:36:42 +0000
Subject: [PATCH 2/2] [X86] Fold some (truncate (srl (add X, C1), C2)) patterns
 to (add (truncate (srl X, C2), C1'))

C1' will be smaller than C1 so we are able to avoid generating code with MOVABS and large constants in certain cases.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 61 +++++++++++++++++++
 .../CodeGen/X86/combine-setcc-trunc-add.ll    | 35 +++++------
 2 files changed, 76 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9a916a663a64c2..fab1482b8675c0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48472,6 +48472,64 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
   return SDValue();
 }
 
+// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
+// (add (truncate (srl X, C2), C1')). C1' will be smaller than C1 so we are able
+// to avoid generating code with MOVABS and large constants in certain cases.
+static SDValue combineSetCCTruncAdd(SDValue EFLAGS, X86::CondCode &CC,
+                                    SelectionDAG &DAG) {
+  if (!(CC == X86::COND_E || CC == X86::COND_NE || CC == X86::COND_AE ||
+        CC == X86::COND_B))
+    return SDValue();
+
+  EVT VT = EFLAGS.getValueType();
+  if (EFLAGS.getOpcode() == X86ISD::SUB && VT == MVT::i32) {
+    SDValue CmpLHS = EFLAGS.getOperand(0);
+    auto *CmpConstant = dyn_cast<ConstantSDNode>(EFLAGS.getOperand(1));
+
+    if (CmpLHS.getOpcode() != ISD::TRUNCATE || !CmpConstant)
+      return SDValue();
+
+    SDValue Srl = CmpLHS.getOperand(0);
+    EVT SrlVT = Srl.getValueType();
+    if (Srl.getOpcode() != ISD::SRL || SrlVT != MVT::i64)
+      return SDValue();
+
+    SDValue Add = Srl.getOperand(0);
+    // Avoid changing the ADD if it is used elsewhere.
+    if (Add.getOpcode() != ISD::ADD || !Add.hasOneUse())
+      return SDValue();
+
+    auto *AddConstant = dyn_cast<ConstantSDNode>(Add.getOperand(1));
+    auto *SrlConstant = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
+    if (!AddConstant || !SrlConstant)
+      return SDValue();
+
+    APInt AddConstVal = AddConstant->getAPIntValue();
+    APInt SrlConstVal = SrlConstant->getAPIntValue();
+    if (!SrlConstVal.ugt(VT.getSizeInBits()))
+      return SDValue();
+
+    APInt CmpConstVal = CmpConstant->getAPIntValue();
+    APInt ShiftedAddConst = AddConstVal.lshr(SrlConstVal);
+    if (!CmpConstVal.ult(ShiftedAddConst.trunc(VT.getSizeInBits())) ||
+        (ShiftedAddConst.shl(SrlConstVal)) != AddConstVal)
+      return SDValue();
+
+    SDLoc DL(EFLAGS);
+    SDValue AddLHSSrl =
+        DAG.getNode(ISD::SRL, DL, SrlVT, Add.getOperand(0), Srl.getOperand(1));
+    SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
+
+    APInt NewAddConstVal =
+        (~((~AddConstVal).lshr(SrlConstVal))).trunc(VT.getSizeInBits());
+    SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
+    SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
+    return DAG.getNode(X86ISD::CMP, DL, VT, NewAddNode, EFLAGS.getOperand(1));
+  }
+
+  return SDValue();
+}
+
 /// Optimize an EFLAGS definition used according to the condition code \p CC
 /// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
 /// uses of chain values.
@@ -48494,6 +48552,9 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
   if (SDValue R = combineSetCCMOVMSK(EFLAGS, CC, DAG, Subtarget))
     return R;
 
+  if (SDValue R = combineSetCCTruncAdd(EFLAGS, CC, DAG))
+    return R;
+
   return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget);
 }
 
diff --git a/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll b/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll
index 67383427d96620..b84b256e7fa592 100644
--- a/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll
+++ b/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll
@@ -6,10 +6,9 @@
 define i1 @test_ult_trunc_add(i64 %x) {
 ; X64-LABEL: test_ult_trunc_add:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    shrq $48, %rdi
+; X64-NEXT:    addl $-65522, %edi # imm = 0xFFFF000E
+; X64-NEXT:    cmpl $3, %edi
 ; X64-NEXT:    setb %al
 ; X64-NEXT:    retq
 entry:
@@ -23,10 +22,9 @@ entry:
 define i1 @test_ult_add(i64 %x) {
 ; X64-LABEL: test_ult_add:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    shrq $48, %rdi
+; X64-NEXT:    addl $-65522, %edi # imm = 0xFFFF000E
+; X64-NEXT:    cmpl $3, %edi
 ; X64-NEXT:    setb %al
 ; X64-NEXT:    retq
 entry:
@@ -38,10 +36,9 @@ entry:
 define i1 @test_ugt_trunc_add(i64 %x) {
 ; X64-LABEL: test_ugt_trunc_add:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    cmpl $4, %eax
+; X64-NEXT:    shrq $48, %rdi
+; X64-NEXT:    addl $-65522, %edi # imm = 0xFFFF000E
+; X64-NEXT:    cmpl $4, %edi
 ; X64-NEXT:    setae %al
 ; X64-NEXT:    retq
 entry:
@@ -70,10 +67,9 @@ entry:
 define i1 @test_eq_trunc_add(i64 %x) {
 ; X64-LABEL: test_eq_trunc_add:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    shrq $48, %rdi
+; X64-NEXT:    addl $-65522, %edi # imm = 0xFFFF000E
+; X64-NEXT:    cmpl $3, %edi
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
 entry:
@@ -100,10 +96,9 @@ entry:
 define i1 @test_ne_trunc_add(i64 %x) {
 ; X64-LABEL: test_ne_trunc_add:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    shrq $48, %rdi
+; X64-NEXT:    addl $-65522, %edi # imm = 0xFFFF000E
+; X64-NEXT:    cmpl $3, %edi
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
 entry: