[llvm] 8e2f649 - [DAGCombiner] Do not always fold FREEZE over BUILD_VECTOR (#85932)

Fri Apr 26 04:53:08 PDT 2024

Author: Bjorn Pettersson
Date: 2024-04-26T13:41:21+02:00
New Revision: 8e2f6495c0bac1dd6ee32b6a0d24152c9c343624

URL: https://github.com/llvm/llvm-project/commit/8e2f6495c0bac1dd6ee32b6a0d24152c9c343624
DIFF: https://github.com/llvm/llvm-project/commit/8e2f6495c0bac1dd6ee32b6a0d24152c9c343624.diff

LOG: [DAGCombiner] Do not always fold FREEZE over BUILD_VECTOR (#85932)

Avoid turning a BUILD_VECTOR that can be recognized as "all zeros",
"all ones" or "constant" into something that depends on
freeze(undef), as that would destroy those properties.

Instead we replace undef by 0/-1 in such vectors, making it possible
to fold away the freeze. We typically use -1 if the BUILD_VECTOR
would identify as "all ones", and otherwise we use the value 0.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/X86/freeze-binary.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aa746f1c7b7b3b..f984d4b3959648 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15452,6 +15452,26 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
                                           N0.getOpcode() == ISD::BUILD_PAIR ||
                                           N0.getOpcode() == ISD::CONCAT_VECTORS;
 
+  // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
+  // ones" or "constant" into something that depends on FrozenUndef. We can
+  // instead pick undef values to keep those properties, while at the same time
+  // folding away the freeze.
+  // If we implement a more general solution for folding away freeze(undef) in
+  // the future, then this special handling can be removed.
+  if (N0.getOpcode() == ISD::BUILD_VECTOR) {
+    SDLoc DL(N0);
+    MVT VT = N0.getSimpleValueType();
+    if (llvm::ISD::isBuildVectorAllOnes(N0.getNode()))
+      return DAG.getAllOnesConstant(DL, VT);
+    if (llvm::ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+      SmallVector<SDValue, 8> NewVecC;
+      for (const SDValue &Op : N0->op_values())
+        NewVecC.push_back(
+            Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
+      return DAG.getBuildVector(VT, DL, NewVecC);
+    }
+  }
+
   SmallSetVector<SDValue, 8> MaybePoisonOperands;
   for (SDValue Op : N0->ops()) {
     if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,

diff  --git a/llvm/test/CodeGen/X86/freeze-binary.ll b/llvm/test/CodeGen/X86/freeze-binary.ll
index d75fc5318bd8aa..b212e9438e1b52 100644
--- a/llvm/test/CodeGen/X86/freeze-binary.ll
+++ b/llvm/test/CodeGen/X86/freeze-binary.ll
@@ -202,27 +202,13 @@ define <4 x i32> @freeze_add_vec(<4 x i32> %a0) nounwind {
 define <4 x i32> @freeze_add_vec_undef(<4 x i32> %a0) nounwind {
 ; X86-LABEL: freeze_add_vec_undef:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-16, %esp
-; X86-NEXT:    subl $32, %esp
-; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT:    movl $3, {{[0-9]+}}(%esp)
-; X86-NEXT:    movl $2, {{[0-9]+}}(%esp)
-; X86-NEXT:    movl $1, (%esp)
-; X86-NEXT:    paddd (%esp), %xmm0
 ; X86-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
+; X86-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: freeze_add_vec_undef:
 ; X64:       # %bb.0:
-; X64-NEXT:    movabsq $8589934593, %rax # imm = 0x200000001
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movl $3, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    vpaddd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
+; X64-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; X64-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; X64-NEXT:    retq
   %x = add <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 undef>
@@ -287,27 +273,13 @@ define <4 x i32> @freeze_sub_vec(<4 x i32> %a0) nounwind {
 define <4 x i32> @freeze_sub_vec_undef(<4 x i32> %a0) nounwind {
 ; X86-LABEL: freeze_sub_vec_undef:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-16, %esp
-; X86-NEXT:    subl $32, %esp
-; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT:    movl $3, {{[0-9]+}}(%esp)
-; X86-NEXT:    movl $2, {{[0-9]+}}(%esp)
-; X86-NEXT:    movl $1, (%esp)
-; X86-NEXT:    psubd (%esp), %xmm0
 ; X86-NEXT:    psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
+; X86-NEXT:    psubd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: freeze_sub_vec_undef:
 ; X64:       # %bb.0:
-; X64-NEXT:    movabsq $8589934593, %rax # imm = 0x200000001
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movl $3, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    vpsubd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
+; X64-NEXT:    vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; X64-NEXT:    vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; X64-NEXT:    retq
   %x = sub <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 undef>
@@ -373,29 +345,13 @@ define <8 x i16> @freeze_mul_vec(<8 x i16> %a0) nounwind {
 define <8 x i16> @freeze_mul_vec_undef(<8 x i16> %a0) nounwind {
 ; X86-LABEL: freeze_mul_vec_undef:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebp
-; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    andl $-16, %esp
-; X86-NEXT:    subl $32, %esp
-; X86-NEXT:    movw %ax, {{[0-9]+}}(%esp)
-; X86-NEXT:    movw $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    movl $196612, {{[0-9]+}}(%esp) # imm = 0x30004
-; X86-NEXT:    movl $262147, {{[0-9]+}}(%esp) # imm = 0x40003
-; X86-NEXT:    movl $131073, (%esp) # imm = 0x20001
-; X86-NEXT:    pmullw (%esp), %xmm0
 ; X86-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT:    movl %ebp, %esp
-; X86-NEXT:    popl %ebp
+; X86-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: freeze_mul_vec_undef:
 ; X64:       # %bb.0:
-; X64-NEXT:    movabsq $1125912791875585, %rax # imm = 0x4000300020001
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movw $1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movl $196612, -{{[0-9]+}}(%rsp) # imm = 0x30004
-; X64-NEXT:    vpmullw -{{[0-9]+}}(%rsp), %xmm0, %xmm0
+; X64-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; X64-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; X64-NEXT:    retq
   %x = mul <8 x i16> %a0, <i16 1, i16 2, i16 3, i16 4, i16 4, i16 3, i16 undef, i16 1>