[llvm] 8e2f649 - [DAGCombiner] Do not always fold FREEZE over BUILD_VECTOR (#85932)
Bjorn Pettersson via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 26 04:53:08 PDT 2024
Author: Bjorn Pettersson
Date: 2024-04-26T13:41:21+02:00
New Revision: 8e2f6495c0bac1dd6ee32b6a0d24152c9c343624
URL: https://github.com/llvm/llvm-project/commit/8e2f6495c0bac1dd6ee32b6a0d24152c9c343624
DIFF: https://github.com/llvm/llvm-project/commit/8e2f6495c0bac1dd6ee32b6a0d24152c9c343624.diff
LOG: [DAGCombiner] Do not always fold FREEZE over BUILD_VECTOR (#85932)
Avoid turning a BUILD_VECTOR that can be recognized as "all zeros",
"all ones" or "constant" into something that depends on
freeze(undef), as that would destroy those properties.
Instead we replace undef by 0/-1 in such vectors, making it possible
to fold away the freeze. We typically use -1 if the BUILD_VECTOR
would identify as "all ones", and otherwise we use the value 0.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/freeze-binary.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aa746f1c7b7b3b..f984d4b3959648 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15452,6 +15452,26 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
N0.getOpcode() == ISD::BUILD_PAIR ||
N0.getOpcode() == ISD::CONCAT_VECTORS;
+ // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
+ // ones" or "constant" into something that depends on FrozenUndef. We can
+ // instead pick undef values to keep those properties, while at the same time
+ // folding away the freeze.
+ // If we implement a more general solution for folding away freeze(undef) in
+ // the future, then this special handling can be removed.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR) {
+ SDLoc DL(N0);
+ MVT VT = N0.getSimpleValueType();
+ if (llvm::ISD::isBuildVectorAllOnes(N0.getNode()))
+ return DAG.getAllOnesConstant(DL, VT);
+ if (llvm::ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+ SmallVector<SDValue, 8> NewVecC;
+ for (const SDValue &Op : N0->op_values())
+ NewVecC.push_back(
+ Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
+ return DAG.getBuildVector(VT, DL, NewVecC);
+ }
+ }
+
SmallSetVector<SDValue, 8> MaybePoisonOperands;
for (SDValue Op : N0->ops()) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
diff --git a/llvm/test/CodeGen/X86/freeze-binary.ll b/llvm/test/CodeGen/X86/freeze-binary.ll
index d75fc5318bd8aa..b212e9438e1b52 100644
--- a/llvm/test/CodeGen/X86/freeze-binary.ll
+++ b/llvm/test/CodeGen/X86/freeze-binary.ll
@@ -202,27 +202,13 @@ define <4 x i32> @freeze_add_vec(<4 x i32> %a0) nounwind {
define <4 x i32> @freeze_add_vec_undef(<4 x i32> %a0) nounwind {
; X86-LABEL: freeze_add_vec_undef:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $32, %esp
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $3, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $2, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $1, (%esp)
-; X86-NEXT: paddd (%esp), %xmm0
; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT: movl %ebp, %esp
-; X86-NEXT: popl %ebp
+; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: freeze_add_vec_undef:
; X64: # %bb.0:
-; X64-NEXT: movabsq $8589934593, %rax # imm = 0x200000001
-; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl $3, -{{[0-9]+}}(%rsp)
-; X64-NEXT: vpaddd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
+; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: retq
%x = add <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 undef>
@@ -287,27 +273,13 @@ define <4 x i32> @freeze_sub_vec(<4 x i32> %a0) nounwind {
define <4 x i32> @freeze_sub_vec_undef(<4 x i32> %a0) nounwind {
; X86-LABEL: freeze_sub_vec_undef:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $32, %esp
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $3, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $2, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $1, (%esp)
-; X86-NEXT: psubd (%esp), %xmm0
; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT: movl %ebp, %esp
-; X86-NEXT: popl %ebp
+; X86-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: freeze_sub_vec_undef:
; X64: # %bb.0:
-; X64-NEXT: movabsq $8589934593, %rax # imm = 0x200000001
-; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl $3, -{{[0-9]+}}(%rsp)
-; X64-NEXT: vpsubd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
+; X64-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: retq
%x = sub <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 undef>
@@ -373,29 +345,13 @@ define <8 x i16> @freeze_mul_vec(<8 x i16> %a0) nounwind {
define <8 x i16> @freeze_mul_vec_undef(<8 x i16> %a0) nounwind {
; X86-LABEL: freeze_mul_vec_undef:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $32, %esp
-; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
-; X86-NEXT: movw $1, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $196612, {{[0-9]+}}(%esp) # imm = 0x30004
-; X86-NEXT: movl $262147, {{[0-9]+}}(%esp) # imm = 0x40003
-; X86-NEXT: movl $131073, (%esp) # imm = 0x20001
-; X86-NEXT: pmullw (%esp), %xmm0
; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT: movl %ebp, %esp
-; X86-NEXT: popl %ebp
+; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: freeze_mul_vec_undef:
; X64: # %bb.0:
-; X64-NEXT: movabsq $1125912791875585, %rax # imm = 0x4000300020001
-; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movw $1, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl $196612, -{{[0-9]+}}(%rsp) # imm = 0x30004
-; X64-NEXT: vpmullw -{{[0-9]+}}(%rsp), %xmm0, %xmm0
+; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT: retq
%x = mul <8 x i16> %a0, <i16 1, i16 2, i16 3, i16 4, i16 4, i16 3, i16 undef, i16 1>
More information about the llvm-commits
mailing list