[llvm] [X86] Fold some (setcc (sub (truncate (srl (add X, C1), C2)), C3), CC) patterns to (setcc (cmp (add (truncate (srl X, C2)), C1'), C3), CC) (PR #126448)

João Gouveia via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 10 13:35:21 PST 2025


https://github.com/joaotgouveia updated https://github.com/llvm/llvm-project/pull/126448

>From 3d735253520f81cc5f6211ad32500033247f04ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Sun, 9 Feb 2025 23:16:18 +0000
Subject: [PATCH 1/3] [X86] Add test coverage for some i64 cmp+srl+add cases

---
 .../CodeGen/X86/combine-setcc-trunc-add.ll    | 128 ++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll

diff --git a/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll b/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll
new file mode 100644
index 000000000000000..67383427d96620b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll
@@ -0,0 +1,128 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
+
+; Test for https://github.com/llvm/llvm-project/issues/123239
+
+define i1 @test_ult_trunc_add(i64 %x) {
+; X64-LABEL: test_ult_trunc_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
+entry:
+  %add = add i64 %x, 3940649673949184
+  %shr = lshr i64 %add, 48
+  %conv = trunc i64 %shr to i32
+  %res = icmp ult i32 %conv, 3
+  ret i1 %res
+}
+
+define i1 @test_ult_add(i64 %x) {
+; X64-LABEL: test_ult_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
+entry:
+    %0 = add i64 3940649673949184, %x
+    %1 = icmp ult i64 %0, 844424930131968
+    ret i1 %1
+}
+
+define i1 @test_ugt_trunc_add(i64 %x) {
+; X64-LABEL: test_ugt_trunc_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    cmpl $4, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    retq
+entry:
+  %add = add i64 %x, 3940649673949184
+  %shr = lshr i64 %add, 48
+  %conv = trunc i64 %shr to i32
+  %res = icmp ugt i32 %conv, 3
+  ret i1 %res
+}
+
+define i1 @test_ugt_add(i64 %x) {
+; X64-LABEL: test_ugt_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    movabsq $844424930131968, %rcx # imm = 0x3000000000000
+; X64-NEXT:    cmpq %rcx, %rax
+; X64-NEXT:    seta %al
+; X64-NEXT:    retq
+entry:
+    %0 = add i64 3940649673949184, %x
+    %1 = icmp ugt i64 %0, 844424930131968
+    ret i1 %1
+}
+
+define i1 @test_eq_trunc_add(i64 %x) {
+; X64-LABEL: test_eq_trunc_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+entry:
+  %add = add i64 %x, 3940649673949184
+  %shr = lshr i64 %add, 48
+  %conv = trunc i64 %shr to i32
+  %res = icmp eq i32 %conv, 3
+  ret i1 %res
+}
+
+define i1 @test_eq_add(i64 %x) {
+; X64-LABEL: test_eq_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000
+; X64-NEXT:    cmpq %rax, %rdi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+entry:
+    %0 = add i64 3940649673949184, %x
+    %1 = icmp eq i64 %0, 844424930131968
+    ret i1 %1
+}
+
+define i1 @test_ne_trunc_add(i64 %x) {
+; X64-LABEL: test_ne_trunc_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+entry:
+  %add = add i64 %x, 3940649673949184
+  %shr = lshr i64 %add, 48
+  %conv = trunc i64 %shr to i32
+  %res = icmp ne i32 %conv, 3
+  ret i1 %res
+}
+
+define i1 @test_ne_add(i64 %x) {
+; X64-LABEL: test_ne_add:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000
+; X64-NEXT:    cmpq %rax, %rdi
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+entry:
+    %0 = add i64 3940649673949184, %x
+    %1 = icmp ne i64 %0, 844424930131968
+    ret i1 %1
+}

>From f53089995d91c9b423ae028c16f71e4de0a7b567 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Sun, 9 Feb 2025 23:36:42 +0000
Subject: [PATCH 2/3] [X86] Fold some (truncate (srl (add X, C1), C2)) patterns
 to (add (truncate (srl X, C2), C1'))

C1' will be smaller than C1 so we are able to avoid generating code with MOVABS and large constants in certain cases.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 61 +++++++++++++++++++
 .../CodeGen/X86/combine-setcc-trunc-add.ll    | 35 +++++------
 2 files changed, 76 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9a916a663a64c20..fab1482b8675c00 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48472,6 +48472,64 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
   return SDValue();
 }
 
+// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
+// (add (truncate (srl X, C2), C1')). C1' will be smaller than C1 so we are able
+// to avoid generating code with MOVABS and large constants in certain cases.
+static SDValue combineSetCCTruncAdd(SDValue EFLAGS, X86::CondCode &CC,
+                                    SelectionDAG &DAG) {
+  if (!(CC == X86::COND_E || CC == X86::COND_NE || CC == X86::COND_AE ||
+        CC == X86::COND_B))
+    return SDValue();
+
+  EVT VT = EFLAGS.getValueType();
+  if (EFLAGS.getOpcode() == X86ISD::SUB && VT == MVT::i32) {
+    SDValue CmpLHS = EFLAGS.getOperand(0);
+    auto *CmpConstant = dyn_cast<ConstantSDNode>(EFLAGS.getOperand(1));
+
+    if (CmpLHS.getOpcode() != ISD::TRUNCATE || !CmpConstant)
+      return SDValue();
+
+    SDValue Srl = CmpLHS.getOperand(0);
+    EVT SrlVT = Srl.getValueType();
+    if (Srl.getOpcode() != ISD::SRL || SrlVT != MVT::i64)
+      return SDValue();
+
+    SDValue Add = Srl.getOperand(0);
+    // Avoid changing the ADD if it is used elsewhere.
+    if (Add.getOpcode() != ISD::ADD || !Add.hasOneUse())
+      return SDValue();
+
+    auto *AddConstant = dyn_cast<ConstantSDNode>(Add.getOperand(1));
+    auto *SrlConstant = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
+    if (!AddConstant || !SrlConstant)
+      return SDValue();
+
+    APInt AddConstVal = AddConstant->getAPIntValue();
+    APInt SrlConstVal = SrlConstant->getAPIntValue();
+    if (!SrlConstVal.ugt(VT.getSizeInBits()))
+      return SDValue();
+
+    APInt CmpConstVal = CmpConstant->getAPIntValue();
+    APInt ShiftedAddConst = AddConstVal.lshr(SrlConstVal);
+    if (!CmpConstVal.ult(ShiftedAddConst.trunc(VT.getSizeInBits())) ||
+        (ShiftedAddConst.shl(SrlConstVal)) != AddConstVal)
+      return SDValue();
+
+    SDLoc DL(EFLAGS);
+    SDValue AddLHSSrl =
+        DAG.getNode(ISD::SRL, DL, SrlVT, Add.getOperand(0), Srl.getOperand(1));
+    SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
+
+    APInt NewAddConstVal =
+        (~((~AddConstVal).lshr(SrlConstVal))).trunc(VT.getSizeInBits());
+    SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
+    SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
+    return DAG.getNode(X86ISD::CMP, DL, VT, NewAddNode, EFLAGS.getOperand(1));
+  }
+
+  return SDValue();
+}
+
 /// Optimize an EFLAGS definition used according to the condition code \p CC
 /// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
 /// uses of chain values.
@@ -48494,6 +48552,9 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
   if (SDValue R = combineSetCCMOVMSK(EFLAGS, CC, DAG, Subtarget))
     return R;
 
+  if (SDValue R = combineSetCCTruncAdd(EFLAGS, CC, DAG))
+    return R;
+
   return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget);
 }
 
diff --git a/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll b/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll
index 67383427d96620b..b84b256e7fa5928 100644
--- a/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll
+++ b/llvm/test/CodeGen/X86/combine-setcc-trunc-add.ll
@@ -6,10 +6,9 @@
 define i1 @test_ult_trunc_add(i64 %x) {
 ; X64-LABEL: test_ult_trunc_add:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    shrq $48, %rdi
+; X64-NEXT:    addl $-65522, %edi # imm = 0xFFFF000E
+; X64-NEXT:    cmpl $3, %edi
 ; X64-NEXT:    setb %al
 ; X64-NEXT:    retq
 entry:
@@ -23,10 +22,9 @@ entry:
 define i1 @test_ult_add(i64 %x) {
 ; X64-LABEL: test_ult_add:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    shrq $48, %rdi
+; X64-NEXT:    addl $-65522, %edi # imm = 0xFFFF000E
+; X64-NEXT:    cmpl $3, %edi
 ; X64-NEXT:    setb %al
 ; X64-NEXT:    retq
 entry:
@@ -38,10 +36,9 @@ entry:
 define i1 @test_ugt_trunc_add(i64 %x) {
 ; X64-LABEL: test_ugt_trunc_add:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    cmpl $4, %eax
+; X64-NEXT:    shrq $48, %rdi
+; X64-NEXT:    addl $-65522, %edi # imm = 0xFFFF000E
+; X64-NEXT:    cmpl $4, %edi
 ; X64-NEXT:    setae %al
 ; X64-NEXT:    retq
 entry:
@@ -70,10 +67,9 @@ entry:
 define i1 @test_eq_trunc_add(i64 %x) {
 ; X64-LABEL: test_eq_trunc_add:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    shrq $48, %rdi
+; X64-NEXT:    addl $-65522, %edi # imm = 0xFFFF000E
+; X64-NEXT:    cmpl $3, %edi
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
 entry:
@@ -100,10 +96,9 @@ entry:
 define i1 @test_ne_trunc_add(i64 %x) {
 ; X64-LABEL: test_ne_trunc_add:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT:    addq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    cmpl $3, %eax
+; X64-NEXT:    shrq $48, %rdi
+; X64-NEXT:    addl $-65522, %edi # imm = 0xFFFF000E
+; X64-NEXT:    cmpl $3, %edi
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
 entry:

>From a9d77fdead409876d252ae7cc45add19ec63e7d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Mon, 10 Feb 2025 21:15:15 +0000
Subject: [PATCH 3/3] [X86] Address reviews

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 81 +++++++++++--------------
 1 file changed, 36 insertions(+), 45 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fab1482b8675c00..28bea48e7b5b784 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48472,62 +48472,53 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
   return SDValue();
 }
 
-// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
-// (add (truncate (srl X, C2), C1')). C1' will be smaller than C1 so we are able
-// to avoid generating code with MOVABS and large constants in certain cases.
+// Attempt to fold some (setcc (sub (truncate (srl (add X, C1), C2)), C3), CC)
+// patterns to (setcc (cmp (add (truncate (srl X, C2)), C1'), C3), CC). C1' will
+// be smaller than C1 so we are able to avoid generating code with MOVABS and
+// large constants in certain cases.
 static SDValue combineSetCCTruncAdd(SDValue EFLAGS, X86::CondCode &CC,
                                     SelectionDAG &DAG) {
+  using namespace llvm::SDPatternMatch;
   if (!(CC == X86::COND_E || CC == X86::COND_NE || CC == X86::COND_AE ||
         CC == X86::COND_B))
     return SDValue();
 
-  EVT VT = EFLAGS.getValueType();
-  if (EFLAGS.getOpcode() == X86ISD::SUB && VT == MVT::i32) {
-    SDValue CmpLHS = EFLAGS.getOperand(0);
-    auto *CmpConstant = dyn_cast<ConstantSDNode>(EFLAGS.getOperand(1));
-
-    if (CmpLHS.getOpcode() != ISD::TRUNCATE || !CmpConstant)
-      return SDValue();
-
-    SDValue Srl = CmpLHS.getOperand(0);
-    EVT SrlVT = Srl.getValueType();
-    if (Srl.getOpcode() != ISD::SRL || SrlVT != MVT::i64)
-      return SDValue();
-
-    SDValue Add = Srl.getOperand(0);
-    // Avoid changing the ADD if it is used elsewhere.
-    if (Add.getOpcode() != ISD::ADD || !Add.hasOneUse())
-      return SDValue();
-
-    auto *AddConstant = dyn_cast<ConstantSDNode>(Add.getOperand(1));
-    auto *SrlConstant = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
-    if (!AddConstant || !SrlConstant)
-      return SDValue();
+  SDValue AddLhs;
+  APInt AddConst, SrlConst, CmpConst;
+  if (!sd_match(EFLAGS,
+                m_AllOf(m_SpecificVT(MVT::i32),
+                        m_BinOp(X86ISD::SUB,
+                                m_Trunc(m_Srl(m_Add(m_Value(AddLhs),
+                                                    m_ConstInt(AddConst)),
+                                              m_ConstInt(SrlConst))),
+                                m_ConstInt(CmpConst)))))
+    return SDValue();
 
-    APInt AddConstVal = AddConstant->getAPIntValue();
-    APInt SrlConstVal = SrlConstant->getAPIntValue();
-    if (!SrlConstVal.ugt(VT.getSizeInBits()))
-      return SDValue();
+  SDValue Srl;
+  if (!sd_match(EFLAGS.getOperand(0).getOperand(0),
+                m_AllOf(m_SpecificVT(MVT::i64), m_Value(Srl))))
+    return SDValue();
 
-    APInt CmpConstVal = CmpConstant->getAPIntValue();
-    APInt ShiftedAddConst = AddConstVal.lshr(SrlConstVal);
-    if (!CmpConstVal.ult(ShiftedAddConst.trunc(VT.getSizeInBits())) ||
-        (ShiftedAddConst.shl(SrlConstVal)) != AddConstVal)
-      return SDValue();
+  // Avoid changing the ADD if it is used elsewhere.
+  if (!Srl.getOperand(0).hasOneUse())
+    return SDValue();
 
-    SDLoc DL(EFLAGS);
-    SDValue AddLHSSrl =
-        DAG.getNode(ISD::SRL, DL, SrlVT, Add.getOperand(0), Srl.getOperand(1));
-    SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
+  EVT VT = EFLAGS.getValueType();
+  APInt ShiftedAddConst = AddConst.lshr(SrlConst);
+  if (!CmpConst.ult(ShiftedAddConst.trunc(VT.getSizeInBits())) ||
+      (ShiftedAddConst.shl(SrlConst)) != AddConst)
+    return SDValue();
 
-    APInt NewAddConstVal =
-        (~((~AddConstVal).lshr(SrlConstVal))).trunc(VT.getSizeInBits());
-    SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
-    SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
-    return DAG.getNode(X86ISD::CMP, DL, VT, NewAddNode, EFLAGS.getOperand(1));
-  }
+  SDLoc DL(EFLAGS);
+  SDValue AddLHSSrl =
+      DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, Srl.getOperand(1));
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
 
-  return SDValue();
+  APInt NewAddConstVal =
+      (~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits());
+  SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
+  SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
+  return DAG.getNode(X86ISD::CMP, DL, VT, NewAddNode, EFLAGS.getOperand(1));
 }
 
 /// Optimize an EFLAGS definition used according to the condition code \p CC



More information about the llvm-commits mailing list