[llvm] 0a913b5 - [X86] Fold some (truncate (srl (add X, C1), C2)) patterns to (add (truncate (srl X, C2)), C1') (#126448)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 01:17:12 PST 2025
Author: João Gouveia
Date: 2025-02-21T17:17:09+08:00
New Revision: 0a913b5e3a234d3261ccca54f9458715c00fae3b
URL: https://github.com/llvm/llvm-project/commit/0a913b5e3a234d3261ccca54f9458715c00fae3b
DIFF: https://github.com/llvm/llvm-project/commit/0a913b5e3a234d3261ccca54f9458715c00fae3b.diff
LOG: [X86] Fold some (truncate (srl (add X, C1), C2)) patterns to (add (truncate (srl X, C2)), C1') (#126448)
Addresses the poor codegen identified in #123239 and a few extra cases.
This transformation is correct for `eq`
(https://alive2.llvm.org/ce/z/qZhwtT), `ne`
(https://alive2.llvm.org/ce/z/6gsmNz), `ult`
(https://alive2.llvm.org/ce/z/xip_td) and `ugt`
(https://alive2.llvm.org/ce/z/39XQkX).
Fixes #123239
Added:
llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 157b5d40f15d1..683c8c3bdf96d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53697,6 +53697,41 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
DAG.getUNDEF(SrcVT)));
}
+// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
+// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able
+// to avoid generating code with MOVABS and large constants in certain cases.
+static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
+ const SDLoc &DL) {
+ using namespace llvm::SDPatternMatch;
+
+ SDValue AddLhs;
+ APInt AddConst, SrlConst;
+ if (VT != MVT::i32 ||
+ !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64),
+ m_Srl(m_OneUse(m_Add(m_Value(AddLhs),
+ m_ConstInt(AddConst))),
+ m_ConstInt(SrlConst)))))
+ return SDValue();
+
+ if (SrlConst.ule(31) || AddConst.lshr(SrlConst).shl(SrlConst) != AddConst)
+ return SDValue();
+
+ SDValue AddLHSSrl =
+ DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
+
+ APInt NewAddConstVal =
+ (~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits());
+ SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
+ SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
+
+ APInt CleanupSizeConstVal = (SrlConst - 32).zextOrTrunc(VT.getSizeInBits());
+ EVT CleanUpVT =
+ EVT::getIntegerVT(*DAG.getContext(), CleanupSizeConstVal.getZExtValue());
+ SDValue CleanUp = DAG.getAnyExtOrTrunc(NewAddNode, DL, CleanUpVT);
+ return DAG.getAnyExtOrTrunc(CleanUp, DL, VT);
+}
+
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
/// the codegen.
/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
@@ -53742,6 +53777,9 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
if (!Src.hasOneUse())
return SDValue();
+ if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL))
+ return R;
+
// Only support vector truncation for now.
// TODO: i64 scalar math would benefit as well.
if (!VT.isVector())
diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
new file mode 100644
index 0000000000000..1ce1e7e1c2b9f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
+
+; Test for https://github.com/llvm/llvm-project/issues/123239
+
+define i1 @test_ult_trunc_add(i64 %x) {
+; X64-LABEL: test_ult_trunc_add:
+; X64: # %bb.0:
+; X64-NEXT: shrq $48, %rdi
+; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
+; X64-NEXT: cmpl $3, %edi
+; X64-NEXT: setb %al
+; X64-NEXT: retq
+ %add = add i64 %x, 3940649673949184
+ %shr = lshr i64 %add, 48
+ %conv = trunc i64 %shr to i32
+ %res = icmp ult i32 %conv, 3
+ ret i1 %res
+}
+
+define i1 @test_ult_add(i64 %x) {
+; X64-LABEL: test_ult_add:
+; X64: # %bb.0:
+; X64-NEXT: shrq $48, %rdi
+; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
+; X64-NEXT: cmpl $3, %edi
+; X64-NEXT: setb %al
+; X64-NEXT: retq
+ %add = add i64 3940649673949184, %x
+ %cmp = icmp ult i64 %add, 844424930131968
+ ret i1 %cmp
+}
+
+define i1 @test_ugt_trunc_add(i64 %x) {
+; X64-LABEL: test_ugt_trunc_add:
+; X64: # %bb.0:
+; X64-NEXT: shrq $48, %rdi
+; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
+; X64-NEXT: cmpl $4, %edi
+; X64-NEXT: setae %al
+; X64-NEXT: retq
+ %add = add i64 %x, 3940649673949184
+ %shr = lshr i64 %add, 48
+ %conv = trunc i64 %shr to i32
+ %res = icmp ugt i32 %conv, 3
+ ret i1 %res
+}
+
+define i1 @test_ugt_add(i64 %x) {
+; X64-LABEL: test_ugt_add:
+; X64: # %bb.0:
+; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT: addq %rdi, %rax
+; X64-NEXT: movabsq $844424930131968, %rcx # imm = 0x3000000000000
+; X64-NEXT: cmpq %rcx, %rax
+; X64-NEXT: seta %al
+; X64-NEXT: retq
+ %add = add i64 3940649673949184, %x
+ %cmp = icmp ugt i64 %add, 844424930131968
+ ret i1 %cmp
+}
+
+define i1 @test_eq_trunc_add(i64 %x) {
+; X64-LABEL: test_eq_trunc_add:
+; X64: # %bb.0:
+; X64-NEXT: shrq $48, %rdi
+; X64-NEXT: cmpl $65525, %edi # imm = 0xFFF5
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %add = add i64 %x, 3940649673949184
+ %shr = lshr i64 %add, 48
+ %conv = trunc i64 %shr to i32
+ %res = icmp eq i32 %conv, 3
+ ret i1 %res
+}
+
+define i1 @test_eq_add(i64 %x) {
+; X64-LABEL: test_eq_add:
+; X64: # %bb.0:
+; X64-NEXT: movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000
+; X64-NEXT: cmpq %rax, %rdi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %add = add i64 3940649673949184, %x
+ %cmp = icmp eq i64 %add, 844424930131968
+ ret i1 %cmp
+}
+
+define i1 @test_ne_trunc_add(i64 %x) {
+; X64-LABEL: test_ne_trunc_add:
+; X64: # %bb.0:
+; X64-NEXT: shrq $48, %rdi
+; X64-NEXT: cmpl $65525, %edi # imm = 0xFFF5
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+ %add = add i64 %x, 3940649673949184
+ %shr = lshr i64 %add, 48
+ %conv = trunc i64 %shr to i32
+ %res = icmp ne i32 %conv, 3
+ ret i1 %res
+}
+
+define i1 @test_ne_add(i64 %x) {
+; X64-LABEL: test_ne_add:
+; X64: # %bb.0:
+; X64-NEXT: movabsq $-3096224743817216, %rax # imm = 0xFFF5000000000000
+; X64-NEXT: cmpq %rax, %rdi
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+ %add = add i64 3940649673949184, %x
+ %cmp = icmp ne i64 %add, 844424930131968
+ ret i1 %cmp
+}
+
+define i32 @test_trunc_add(i64 %x) {
+; X64-LABEL: test_trunc_add:
+; X64: # %bb.0:
+; X64-NEXT: shrq $48, %rdi
+; X64-NEXT: leal -65522(%rdi), %eax
+; X64-NEXT: retq
+ %add = add i64 %x, 3940649673949184
+ %shr = lshr i64 %add, 48
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
More information about the llvm-commits
mailing list