[llvm] [X86] Extend `combinei64TruncSrlAdd` to handle patterns with `or` and `xor` (PR #128435)
João Gouveia via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 24 09:42:12 PST 2025
https://github.com/joaotgouveia updated https://github.com/llvm/llvm-project/pull/128435
>From fe8151f38832e8f6861d0e3783a34ca0357ee3c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Sun, 23 Feb 2025 16:52:43 +0000
Subject: [PATCH 1/3] [X86] Add pre-commit tests.
---
.../CodeGen/X86/combine-i64-trunc-srl-add.ll | 59 ++++++++++++++++++-
1 file changed, 57 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
index 14992ca5bf488..bd3c05a2c9302 100644
--- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
+++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
@@ -128,6 +128,61 @@ define i32 @test_trunc_add(i64 %x) {
ret i32 %conv
}
+define i32 @test_trunc_sub(i64 %x) {
+; X64-LABEL: test_trunc_sub:
+; X64: # %bb.0:
+; X64-NEXT: shrq $48, %rdi
+; X64-NEXT: addl $65522, %edi # imm = 0xFFF2
+; X64-NEXT: movzwl %di, %eax
+; X64-NEXT: retq
+ %sub = sub i64 %x, 3940649673949184
+ %shr = lshr i64 %sub, 48
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
+define i32 @test_trunc_and(i64 %x) {
+; X64-LABEL: test_trunc_and:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shrq $48, %rax
+; X64-NEXT: andl $14, %eax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+ %and = and i64 %x, 3940649673949184
+ %shr = lshr i64 %and, 48
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
+define i32 @test_trunc_or(i64 %x) {
+; X64-LABEL: test_trunc_or:
+; X64: # %bb.0:
+; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: shrq $48, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+ %or = or i64 %x, 3940649673949184
+ %shr = lshr i64 %or, 48
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
+define i32 @test_trunc_xor(i64 %x) {
+; X64-LABEL: test_trunc_xor:
+; X64: # %bb.0:
+; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT: xorq %rdi, %rax
+; X64-NEXT: shrq $48, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+ %xor = xor i64 %x, 3940649673949184
+ %shr = lshr i64 %xor, 48
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
; Make sure we don't crash on this test case.
define i32 @pr128158(i64 %x) {
@@ -137,10 +192,10 @@ define i32 @pr128158(i64 %x) {
; X64-NEXT: addq %rdi, %rax
; X64-NEXT: shrq $32, %rax
; X64-NEXT: .p2align 4
-; X64-NEXT: .LBB9_1: # %for.body
+; X64-NEXT: .LBB13_1: # %for.body
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: cmpl $9, %eax
-; X64-NEXT: jb .LBB9_1
+; X64-NEXT: jb .LBB13_1
; X64-NEXT: # %bb.2: # %exit
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
>From 5574dce692ba8c5218106950410655165a9977f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Sun, 23 Feb 2025 19:18:11 +0000
Subject: [PATCH 2/3] [X86] Extend `combinei64TruncSrlAdd` to handle patterns
with `or` and `xor`
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 48 ++++++++++---------
.../CodeGen/X86/combine-i64-trunc-srl-add.ll | 8 ++--
2 files changed, 30 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c146e1e6c0334..47dc9ffb4b24d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53733,36 +53733,42 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src);
}
-// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
-// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able
-// to avoid generating code with MOVABS and large constants in certain cases.
-static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
- const SDLoc &DL) {
+// Attempt to fold some (truncate (srl (binop X, C1), C2)) patterns to
+// (binop (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are
+// able to avoid generating code with MOVABS and large constants in certain
+// cases.
+static SDValue combinei64TruncSrlBinop(SDValue N, EVT VT, SelectionDAG &DAG,
+ const SDLoc &DL) {
using namespace llvm::SDPatternMatch;
- SDValue AddLhs;
- APInt AddConst, SrlConst;
+ SDValue BinopLhs;
+ APInt BinopConst, SrlConst;
if (VT != MVT::i32 ||
- !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64),
- m_Srl(m_OneUse(m_Add(m_Value(AddLhs),
- m_ConstInt(AddConst))),
- m_ConstInt(SrlConst)))))
+ !sd_match(
+ N,
+ m_AllOf(m_SpecificVT(MVT::i64),
+ m_Srl(m_OneUse(m_AnyOf(
+ m_Add(m_Value(BinopLhs), m_ConstInt(BinopConst)),
+ m_Or(m_Value(BinopLhs), m_ConstInt(BinopConst)),
+ m_Xor(m_Value(BinopLhs), m_ConstInt(BinopConst)))),
+ m_ConstInt(SrlConst)))))
return SDValue();
- if (SrlConst.ule(32) || AddConst.countr_zero() < SrlConst.getZExtValue())
+ if (SrlConst.ule(32) || BinopConst.countr_zero() < SrlConst.getZExtValue())
return SDValue();
- SDValue AddLHSSrl =
- DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
+ SDValue BinopLHSSrl =
+ DAG.getNode(ISD::SRL, DL, MVT::i64, BinopLhs, N.getOperand(1));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, BinopLHSSrl);
- APInt NewAddConstVal = AddConst.lshr(SrlConst).trunc(VT.getSizeInBits());
- SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
- SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
+ APInt NewBinopConstVal = BinopConst.lshr(SrlConst).trunc(VT.getSizeInBits());
+ SDValue NewBinopConst = DAG.getConstant(NewBinopConstVal, DL, VT);
+ SDValue NewBinopNode =
+ DAG.getNode(N.getOperand(0).getOpcode(), DL, VT, Trunc, NewBinopConst);
EVT CleanUpVT =
EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue());
- return DAG.getZeroExtendInReg(NewAddNode, DL, CleanUpVT);
+ return DAG.getZeroExtendInReg(NewBinopNode, DL, CleanUpVT);
}
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
@@ -53810,11 +53816,9 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
if (!Src.hasOneUse())
return SDValue();
- if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL))
+ if (SDValue R = combinei64TruncSrlBinop(Src, VT, DAG, DL))
return R;
- // Only support vector truncation for now.
- // TODO: i64 scalar math would benefit as well.
if (!VT.isVector())
return SDValue();
diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
index bd3c05a2c9302..ec29cf9d56c29 100644
--- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
+++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
@@ -158,9 +158,9 @@ define i32 @test_trunc_and(i64 %x) {
define i32 @test_trunc_or(i64 %x) {
; X64-LABEL: test_trunc_or:
; X64: # %bb.0:
-; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shrq $48, %rax
+; X64-NEXT: orl $14, %eax
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%or = or i64 %x, 3940649673949184
@@ -172,9 +172,9 @@ define i32 @test_trunc_or(i64 %x) {
define i32 @test_trunc_xor(i64 %x) {
; X64-LABEL: test_trunc_xor:
; X64: # %bb.0:
-; X64-NEXT: movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT: xorq %rdi, %rax
+; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shrq $48, %rax
+; X64-NEXT: xorl $14, %eax
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%xor = xor i64 %x, 3940649673949184
>From 55189cc558a626155225e519866675c37c394fad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Mon, 24 Feb 2025 17:06:58 +0000
Subject: [PATCH 3/3] [X86] Add extra test cases
---
.../CodeGen/X86/combine-i64-trunc-srl-add.ll | 58 ++++++++++++++++---
1 file changed, 50 insertions(+), 8 deletions(-)
diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
index ec29cf9d56c29..785865bf06a74 100644
--- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
+++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
@@ -141,8 +141,8 @@ define i32 @test_trunc_sub(i64 %x) {
ret i32 %conv
}
-define i32 @test_trunc_and(i64 %x) {
-; X64-LABEL: test_trunc_and:
+define i32 @test_trunc_and_1(i64 %x) {
+; X64-LABEL: test_trunc_and_1:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shrq $48, %rax
@@ -155,8 +155,8 @@ define i32 @test_trunc_and(i64 %x) {
ret i32 %conv
}
-define i32 @test_trunc_or(i64 %x) {
-; X64-LABEL: test_trunc_or:
+define i32 @test_trunc_or_1(i64 %x) {
+; X64-LABEL: test_trunc_or_1:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shrq $48, %rax
@@ -169,8 +169,8 @@ define i32 @test_trunc_or(i64 %x) {
ret i32 %conv
}
-define i32 @test_trunc_xor(i64 %x) {
-; X64-LABEL: test_trunc_xor:
+define i32 @test_trunc_xor_1(i64 %x) {
+; X64-LABEL: test_trunc_xor_1:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: shrq $48, %rax
@@ -183,6 +183,48 @@ define i32 @test_trunc_xor(i64 %x) {
ret i32 %conv
}
+define i32 @test_trunc_and_2(i64 %x) {
+; X64-LABEL: test_trunc_and_2:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shrq $48, %rax
+; X64-NEXT: andl $13, %eax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+ %and = and i64 %x, 3940649673949183
+ %shr = lshr i64 %and, 48
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
+define i32 @test_trunc_or_2(i64 %x) {
+; X64-LABEL: test_trunc_or_2:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shrq $48, %rax
+; X64-NEXT: orl $13, %eax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+ %or = or i64 %x, 3940649673949183
+ %shr = lshr i64 %or, 48
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
+define i32 @test_trunc_xor_2(i64 %x) {
+; X64-LABEL: test_trunc_xor_2:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shrq $48, %rax
+; X64-NEXT: xorl $13, %eax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+ %xor = xor i64 %x, 3940649673949183
+ %shr = lshr i64 %xor, 48
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
; Make sure we don't crash on this test case.
define i32 @pr128158(i64 %x) {
@@ -192,10 +234,10 @@ define i32 @pr128158(i64 %x) {
; X64-NEXT: addq %rdi, %rax
; X64-NEXT: shrq $32, %rax
; X64-NEXT: .p2align 4
-; X64-NEXT: .LBB13_1: # %for.body
+; X64-NEXT: .LBB16_1: # %for.body
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: cmpl $9, %eax
-; X64-NEXT: jb .LBB13_1
+; X64-NEXT: jb .LBB16_1
; X64-NEXT: # %bb.2: # %exit
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
More information about the llvm-commits
mailing list