[llvm] 1085b70 - [DAG] Don't fold (zext (bitop (load x), cst)) -> (bitop (zextload x), (zext cst)) if the zext is free
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 4 08:32:52 PDT 2023
Author: Simon Pilgrim
Date: 2023-11-04T15:32:13Z
New Revision: 1085b70a94d34a645d97990e1ac213882af5128e
URL: https://github.com/llvm/llvm-project/commit/1085b70a94d34a645d97990e1ac213882af5128e
DIFF: https://github.com/llvm/llvm-project/commit/1085b70a94d34a645d97990e1ac213882af5128e.diff
LOG: [DAG] Don't fold (zext (bitop (load x), cst)) -> (bitop (zextload x), (zext cst)) if the zext is free
Prevents an infinite loop if we've been trying to narrow the bitop to a more preferable type
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8c1282274372088..bee50d58c73c32c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13717,8 +13717,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (and/or/xor (load x), cst)) ->
// (and/or/xor (zextload x), (zext cst))
// Unless (and (load x) cst) will match as a zextload already and has
- // additional users.
- if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
+ // additional users, or the zext is already free.
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
diff --git a/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll
index d6716d0edff40cd..e5affd86312efd3 100644
--- a/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -2367,25 +2367,24 @@ define void @shl_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; X64-SSE2-LABEL: ashr_64bytes:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pushq %r14
; X64-SSE2-NEXT: pushq %rbx
-; X64-SSE2-NEXT: movq (%rdi), %rcx
-; X64-SSE2-NEXT: movq 8(%rdi), %r8
-; X64-SSE2-NEXT: movq 16(%rdi), %r9
-; X64-SSE2-NEXT: movq 24(%rdi), %r10
-; X64-SSE2-NEXT: movq 32(%rdi), %r11
-; X64-SSE2-NEXT: movq 40(%rdi), %rbx
-; X64-SSE2-NEXT: movq 48(%rdi), %r14
+; X64-SSE2-NEXT: movq (%rdi), %rax
+; X64-SSE2-NEXT: movq 8(%rdi), %rcx
+; X64-SSE2-NEXT: movq 16(%rdi), %r8
+; X64-SSE2-NEXT: movq 24(%rdi), %r9
+; X64-SSE2-NEXT: movq 32(%rdi), %r10
+; X64-SSE2-NEXT: movq 40(%rdi), %r11
+; X64-SSE2-NEXT: movq 48(%rdi), %rbx
; X64-SSE2-NEXT: movq 56(%rdi), %rdi
-; X64-SSE2-NEXT: movl (%rsi), %eax
+; X64-SSE2-NEXT: movl (%rsi), %esi
; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
-; X64-SSE2-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
; X64-SSE2-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
; X64-SSE2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
; X64-SSE2-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
; X64-SSE2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
; X64-SSE2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; X64-SSE2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
+; X64-SSE2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; X64-SSE2-NEXT: sarq $63, %rdi
; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
@@ -2395,25 +2394,24 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
-; X64-SSE2-NEXT: andl $63, %eax
-; X64-SSE2-NEXT: movq -128(%rsp,%rax), %rcx
-; X64-SSE2-NEXT: movq -120(%rsp,%rax), %rsi
-; X64-SSE2-NEXT: movq -104(%rsp,%rax), %rdi
-; X64-SSE2-NEXT: movq -112(%rsp,%rax), %r8
-; X64-SSE2-NEXT: movq -88(%rsp,%rax), %r9
-; X64-SSE2-NEXT: movq -96(%rsp,%rax), %r10
-; X64-SSE2-NEXT: movq -72(%rsp,%rax), %r11
-; X64-SSE2-NEXT: movq -80(%rsp,%rax), %rax
-; X64-SSE2-NEXT: movq %rax, 48(%rdx)
+; X64-SSE2-NEXT: andl $63, %esi
+; X64-SSE2-NEXT: movq -128(%rsp,%rsi), %rax
+; X64-SSE2-NEXT: movq -120(%rsp,%rsi), %rcx
+; X64-SSE2-NEXT: movq -104(%rsp,%rsi), %rdi
+; X64-SSE2-NEXT: movq -112(%rsp,%rsi), %r8
+; X64-SSE2-NEXT: movq -88(%rsp,%rsi), %r9
+; X64-SSE2-NEXT: movq -96(%rsp,%rsi), %r10
+; X64-SSE2-NEXT: movq -72(%rsp,%rsi), %r11
+; X64-SSE2-NEXT: movq -80(%rsp,%rsi), %rsi
+; X64-SSE2-NEXT: movq %rsi, 48(%rdx)
; X64-SSE2-NEXT: movq %r11, 56(%rdx)
; X64-SSE2-NEXT: movq %r10, 32(%rdx)
; X64-SSE2-NEXT: movq %r9, 40(%rdx)
; X64-SSE2-NEXT: movq %r8, 16(%rdx)
; X64-SSE2-NEXT: movq %rdi, 24(%rdx)
-; X64-SSE2-NEXT: movq %rcx, (%rdx)
-; X64-SSE2-NEXT: movq %rsi, 8(%rdx)
+; X64-SSE2-NEXT: movq %rax, (%rdx)
+; X64-SSE2-NEXT: movq %rcx, 8(%rdx)
; X64-SSE2-NEXT: popq %rbx
-; X64-SSE2-NEXT: popq %r14
; X64-SSE2-NEXT: retq
;
; X64-SSE42-LABEL: ashr_64bytes:
@@ -2443,9 +2441,9 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; X64-SSE42-NEXT: movups -112(%rsp,%rsi), %xmm1
; X64-SSE42-NEXT: movups -96(%rsp,%rsi), %xmm2
; X64-SSE42-NEXT: movups -80(%rsp,%rsi), %xmm3
+; X64-SSE42-NEXT: movups %xmm3, 48(%rdx)
; X64-SSE42-NEXT: movups %xmm1, 16(%rdx)
; X64-SSE42-NEXT: movups %xmm2, 32(%rdx)
-; X64-SSE42-NEXT: movups %xmm3, 48(%rdx)
; X64-SSE42-NEXT: movups %xmm0, (%rdx)
; X64-SSE42-NEXT: retq
;
@@ -2474,9 +2472,9 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; X64-AVX-NEXT: vmovups -112(%rsp,%rsi), %xmm1
; X64-AVX-NEXT: vmovups -96(%rsp,%rsi), %xmm2
; X64-AVX-NEXT: vmovups -80(%rsp,%rsi), %xmm3
+; X64-AVX-NEXT: vmovups %xmm3, 48(%rdx)
; X64-AVX-NEXT: vmovups %xmm1, 16(%rdx)
; X64-AVX-NEXT: vmovups %xmm2, 32(%rdx)
-; X64-AVX-NEXT: vmovups %xmm3, 48(%rdx)
; X64-AVX-NEXT: vmovups %xmm0, (%rdx)
; X64-AVX-NEXT: vzeroupper
; X64-AVX-NEXT: retq
@@ -2772,5 +2770,5 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; FALLBACK7: {{.*}}
; FALLBACK8: {{.*}}
; FALLBACK9: {{.*}}
-; X86: {{.*}}
; X64: {{.*}}
+; X86: {{.*}}
More information about the llvm-commits
mailing list