[llvm] 0751418 - [X86] Extend `combinei64TruncSrlAdd` to handle patterns with `or` and `xor` (#128435)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 1 02:05:53 PST 2025
Author: João Gouveia
Date: 2025-03-01T18:05:49+08:00
New Revision: 0751418024442ac97b8ff484c01f9386aa5723b8
URL: https://github.com/llvm/llvm-project/commit/0751418024442ac97b8ff484c01f9386aa5723b8
DIFF: https://github.com/llvm/llvm-project/commit/0751418024442ac97b8ff484c01f9386aa5723b8.diff
LOG: [X86] Extend `combinei64TruncSrlAdd` to handle patterns with `or` and `xor` (#128435)
As discussed in #126448, the fold implemented by #126448 / #128353 can
be extended to operations other than `add`. This patch extends the fold
performed by `combinei64TruncSrlAdd` to include `or` and `xor` (proof:
https://alive2.llvm.org/ce/z/AXuaQu). There's no need to extend it to
`sub` and `and`, as similar folds are already being performed for those
operations.
CC: @phoebewang @RKSimon
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 12636f22d8409..30e661957d774 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53788,36 +53788,35 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src);
}
-// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
-// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able
-// to avoid generating code with MOVABS and large constants in certain cases.
-static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
- const SDLoc &DL) {
- using namespace llvm::SDPatternMatch;
+// Attempt to fold some (truncate (srl (add/or/xor X, C1), C2)) patterns to
+// (add/or/xor (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we
+// are able to avoid generating code with MOVABS and large constants in certain
+// cases.
+static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG,
+ const SDLoc &DL) {
- SDValue AddLhs;
- APInt AddConst, SrlConst;
- if (VT != MVT::i32 ||
- !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64),
- m_Srl(m_OneUse(m_Add(m_Value(AddLhs),
- m_ConstInt(AddConst))),
- m_ConstInt(SrlConst)))))
- return SDValue();
+ SDValue Op = N.getOperand(0);
+ APInt OpConst = Op.getConstantOperandAPInt(1);
+ APInt SrlConst = N.getConstantOperandAPInt(1);
+ uint64_t SrlConstVal = SrlConst.getZExtValue();
+ unsigned Opcode = Op.getOpcode();
- if (SrlConst.ule(32) || AddConst.countr_zero() < SrlConst.getZExtValue())
+ if (SrlConst.ule(32) ||
+ (Opcode == ISD::ADD && OpConst.countr_zero() < SrlConstVal))
return SDValue();
- SDValue AddLHSSrl =
- DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
+ SDValue OpLhsSrl =
+ DAG.getNode(ISD::SRL, DL, MVT::i64, Op.getOperand(0), N.getOperand(1));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, OpLhsSrl);
- APInt NewAddConstVal = AddConst.lshr(SrlConst).trunc(VT.getSizeInBits());
- SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
- SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
+ APInt NewOpConstVal = OpConst.lshr(SrlConst).trunc(VT.getSizeInBits());
+ SDValue NewOpConst = DAG.getConstant(NewOpConstVal, DL, VT);
+ SDValue NewOpNode = DAG.getNode(Opcode, DL, VT, Trunc, NewOpConst);
+ EVT CleanUpVT = EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConstVal);
- EVT CleanUpVT =
- EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue());
- return DAG.getZeroExtendInReg(NewAddNode, DL, CleanUpVT);
+ if (Opcode == ISD::ADD)
+ return DAG.getZeroExtendInReg(NewOpNode, DL, CleanUpVT);
+ return NewOpNode;
}
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
@@ -53865,11 +53864,21 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
if (!Src.hasOneUse())
return SDValue();
- if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL))
- return R;
+ if (VT == MVT::i32 && SrcVT == MVT::i64 && SrcOpcode == ISD::SRL &&
+ isa<ConstantSDNode>(Src.getOperand(1))) {
+
+ unsigned SrcOpOpcode = Src.getOperand(0).getOpcode();
+ if ((SrcOpOpcode != ISD::ADD && SrcOpOpcode != ISD::OR &&
+ SrcOpOpcode != ISD::XOR) ||
+ !isa<ConstantSDNode>(Src.getOperand(0).getOperand(1)))
+ return SDValue();
+
+ if (SDValue R = combinei64TruncSrlConstant(Src, VT, DAG, DL))
+ return R;
+
+ return SDValue();
+ }
- // Only support vector truncation for now.
- // TODO: i64 scalar math would benefit as well.
if (!VT.isVector())
return SDValue();
diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
index 14992ca5bf488..f7906e5a009ae 100644
--- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
+++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
@@ -128,6 +128,103 @@ define i32 @test_trunc_add(i64 %x) {
ret i32 %conv
}
+define i32 @test_trunc_sub(i64 %x) {
+; X64-LABEL: test_trunc_sub:
+; X64: # %bb.0:
+; X64-NEXT: shrq $49, %rdi
+; X64-NEXT: leal 32762(%rdi), %eax
+; X64-NEXT: andl $32767, %eax # imm = 0x7FFF
+; X64-NEXT: retq
+ %sub = sub i64 %x, 3377699720527872
+ %shr = lshr i64 %sub, 49
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
+define i32 @test_trunc_and_1(i64 %x) {
+; X64-LABEL: test_trunc_and_1:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shrq $50, %rax
+; X64-NEXT: andl $3, %eax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+ %and = and i64 %x, 3940649673949184
+ %shr = lshr i64 %and, 50
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
+define i32 @test_trunc_or_1(i64 %x) {
+; X64-LABEL: test_trunc_or_1:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shrq $50, %rax
+; X64-NEXT: orl $3, %eax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+ %or = or i64 %x, 3940649673949184
+ %shr = lshr i64 %or, 50
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
+define i32 @test_trunc_xor_1(i64 %x) {
+; X64-LABEL: test_trunc_xor_1:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shrq $50, %rax
+; X64-NEXT: xorl $3, %eax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+ %xor = xor i64 %x, 3940649673949184
+ %shr = lshr i64 %xor, 50
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
+define i32 @test_trunc_and_2(i64 %x) {
+; X64-LABEL: test_trunc_and_2:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shrq $45, %rax
+; X64-NEXT: andl $111, %eax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+ %and = and i64 %x, 3940649673949183
+ %shr = lshr i64 %and, 45
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
+define i32 @test_trunc_or_2(i64 %x) {
+; X64-LABEL: test_trunc_or_2:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shrq $45, %rax
+; X64-NEXT: orl $111, %eax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+ %or = or i64 %x, 3940649673949183
+ %shr = lshr i64 %or, 45
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
+define i32 @test_trunc_xor_2(i64 %x) {
+; X64-LABEL: test_trunc_xor_2:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: shrq $45, %rax
+; X64-NEXT: xorl $111, %eax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-NEXT: retq
+ %xor = xor i64 %x, 3940649673949183
+ %shr = lshr i64 %xor, 45
+ %conv = trunc i64 %shr to i32
+ ret i32 %conv
+}
+
; Make sure we don't crash on this test case.
define i32 @pr128158(i64 %x) {
@@ -137,10 +234,10 @@ define i32 @pr128158(i64 %x) {
; X64-NEXT: addq %rdi, %rax
; X64-NEXT: shrq $32, %rax
; X64-NEXT: .p2align 4
-; X64-NEXT: .LBB9_1: # %for.body
+; X64-NEXT: .LBB16_1: # %for.body
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: cmpl $9, %eax
-; X64-NEXT: jb .LBB9_1
+; X64-NEXT: jb .LBB16_1
; X64-NEXT: # %bb.2: # %exit
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
More information about the llvm-commits
mailing list