[llvm] 1085b70 - [DAG] Don't fold (zext (bitop (load x), cst)) -> (bitop (zextload x), (zext cst)) if the zext is free

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 4 08:32:52 PDT 2023


Author: Simon Pilgrim
Date: 2023-11-04T15:32:13Z
New Revision: 1085b70a94d34a645d97990e1ac213882af5128e

URL: https://github.com/llvm/llvm-project/commit/1085b70a94d34a645d97990e1ac213882af5128e
DIFF: https://github.com/llvm/llvm-project/commit/1085b70a94d34a645d97990e1ac213882af5128e.diff

LOG: [DAG] Don't fold (zext (bitop (load x), cst)) -> (bitop (zextload x), (zext cst)) if the zext is free

Prevents an infinite loop if we've been trying to narrow the bitop to a more preferable type

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8c1282274372088..bee50d58c73c32c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13717,8 +13717,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   // fold (zext (and/or/xor (load x), cst)) ->
   //      (and/or/xor (zextload x), (zext cst))
   // Unless (and (load x) cst) will match as a zextload already and has
-  // additional users.
-  if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
+  // additional users, or the zext is already free.
+  if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
       isa<LoadSDNode>(N0.getOperand(0)) &&
       N0.getOperand(1).getOpcode() == ISD::Constant &&
       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {

diff  --git a/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll
index d6716d0edff40cd..e5affd86312efd3 100644
--- a/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -2367,25 +2367,24 @@ define void @shl_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; X64-SSE2-LABEL: ashr_64bytes:
 ; X64-SSE2:       # %bb.0:
-; X64-SSE2-NEXT:    pushq %r14
 ; X64-SSE2-NEXT:    pushq %rbx
-; X64-SSE2-NEXT:    movq (%rdi), %rcx
-; X64-SSE2-NEXT:    movq 8(%rdi), %r8
-; X64-SSE2-NEXT:    movq 16(%rdi), %r9
-; X64-SSE2-NEXT:    movq 24(%rdi), %r10
-; X64-SSE2-NEXT:    movq 32(%rdi), %r11
-; X64-SSE2-NEXT:    movq 40(%rdi), %rbx
-; X64-SSE2-NEXT:    movq 48(%rdi), %r14
+; X64-SSE2-NEXT:    movq (%rdi), %rax
+; X64-SSE2-NEXT:    movq 8(%rdi), %rcx
+; X64-SSE2-NEXT:    movq 16(%rdi), %r8
+; X64-SSE2-NEXT:    movq 24(%rdi), %r9
+; X64-SSE2-NEXT:    movq 32(%rdi), %r10
+; X64-SSE2-NEXT:    movq 40(%rdi), %r11
+; X64-SSE2-NEXT:    movq 48(%rdi), %rbx
 ; X64-SSE2-NEXT:    movq 56(%rdi), %rdi
-; X64-SSE2-NEXT:    movl (%rsi), %eax
+; X64-SSE2-NEXT:    movl (%rsi), %esi
 ; X64-SSE2-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp)
-; X64-SSE2-NEXT:    movq %r14, -{{[0-9]+}}(%rsp)
 ; X64-SSE2-NEXT:    movq %rbx, -{{[0-9]+}}(%rsp)
 ; X64-SSE2-NEXT:    movq %r11, -{{[0-9]+}}(%rsp)
 ; X64-SSE2-NEXT:    movq %r10, -{{[0-9]+}}(%rsp)
 ; X64-SSE2-NEXT:    movq %r9, -{{[0-9]+}}(%rsp)
 ; X64-SSE2-NEXT:    movq %r8, -{{[0-9]+}}(%rsp)
 ; X64-SSE2-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
+; X64-SSE2-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
 ; X64-SSE2-NEXT:    sarq $63, %rdi
 ; X64-SSE2-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp)
 ; X64-SSE2-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp)
@@ -2395,25 +2394,24 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; X64-SSE2-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp)
 ; X64-SSE2-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp)
 ; X64-SSE2-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp)
-; X64-SSE2-NEXT:    andl $63, %eax
-; X64-SSE2-NEXT:    movq -128(%rsp,%rax), %rcx
-; X64-SSE2-NEXT:    movq -120(%rsp,%rax), %rsi
-; X64-SSE2-NEXT:    movq -104(%rsp,%rax), %rdi
-; X64-SSE2-NEXT:    movq -112(%rsp,%rax), %r8
-; X64-SSE2-NEXT:    movq -88(%rsp,%rax), %r9
-; X64-SSE2-NEXT:    movq -96(%rsp,%rax), %r10
-; X64-SSE2-NEXT:    movq -72(%rsp,%rax), %r11
-; X64-SSE2-NEXT:    movq -80(%rsp,%rax), %rax
-; X64-SSE2-NEXT:    movq %rax, 48(%rdx)
+; X64-SSE2-NEXT:    andl $63, %esi
+; X64-SSE2-NEXT:    movq -128(%rsp,%rsi), %rax
+; X64-SSE2-NEXT:    movq -120(%rsp,%rsi), %rcx
+; X64-SSE2-NEXT:    movq -104(%rsp,%rsi), %rdi
+; X64-SSE2-NEXT:    movq -112(%rsp,%rsi), %r8
+; X64-SSE2-NEXT:    movq -88(%rsp,%rsi), %r9
+; X64-SSE2-NEXT:    movq -96(%rsp,%rsi), %r10
+; X64-SSE2-NEXT:    movq -72(%rsp,%rsi), %r11
+; X64-SSE2-NEXT:    movq -80(%rsp,%rsi), %rsi
+; X64-SSE2-NEXT:    movq %rsi, 48(%rdx)
 ; X64-SSE2-NEXT:    movq %r11, 56(%rdx)
 ; X64-SSE2-NEXT:    movq %r10, 32(%rdx)
 ; X64-SSE2-NEXT:    movq %r9, 40(%rdx)
 ; X64-SSE2-NEXT:    movq %r8, 16(%rdx)
 ; X64-SSE2-NEXT:    movq %rdi, 24(%rdx)
-; X64-SSE2-NEXT:    movq %rcx, (%rdx)
-; X64-SSE2-NEXT:    movq %rsi, 8(%rdx)
+; X64-SSE2-NEXT:    movq %rax, (%rdx)
+; X64-SSE2-NEXT:    movq %rcx, 8(%rdx)
 ; X64-SSE2-NEXT:    popq %rbx
-; X64-SSE2-NEXT:    popq %r14
 ; X64-SSE2-NEXT:    retq
 ;
 ; X64-SSE42-LABEL: ashr_64bytes:
@@ -2443,9 +2441,9 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; X64-SSE42-NEXT:    movups -112(%rsp,%rsi), %xmm1
 ; X64-SSE42-NEXT:    movups -96(%rsp,%rsi), %xmm2
 ; X64-SSE42-NEXT:    movups -80(%rsp,%rsi), %xmm3
+; X64-SSE42-NEXT:    movups %xmm3, 48(%rdx)
 ; X64-SSE42-NEXT:    movups %xmm1, 16(%rdx)
 ; X64-SSE42-NEXT:    movups %xmm2, 32(%rdx)
-; X64-SSE42-NEXT:    movups %xmm3, 48(%rdx)
 ; X64-SSE42-NEXT:    movups %xmm0, (%rdx)
 ; X64-SSE42-NEXT:    retq
 ;
@@ -2474,9 +2472,9 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; X64-AVX-NEXT:    vmovups -112(%rsp,%rsi), %xmm1
 ; X64-AVX-NEXT:    vmovups -96(%rsp,%rsi), %xmm2
 ; X64-AVX-NEXT:    vmovups -80(%rsp,%rsi), %xmm3
+; X64-AVX-NEXT:    vmovups %xmm3, 48(%rdx)
 ; X64-AVX-NEXT:    vmovups %xmm1, 16(%rdx)
 ; X64-AVX-NEXT:    vmovups %xmm2, 32(%rdx)
-; X64-AVX-NEXT:    vmovups %xmm3, 48(%rdx)
 ; X64-AVX-NEXT:    vmovups %xmm0, (%rdx)
 ; X64-AVX-NEXT:    vzeroupper
 ; X64-AVX-NEXT:    retq
@@ -2772,5 +2770,5 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; FALLBACK7: {{.*}}
 ; FALLBACK8: {{.*}}
 ; FALLBACK9: {{.*}}
-; X86: {{.*}}
 ; X64: {{.*}}
+; X86: {{.*}}


        


More information about the llvm-commits mailing list