[llvm] 41466a1 - [SelectionDAG] Correct the implementation of m_AllOnes. (#90776)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 1 14:55:18 PDT 2024
Author: Craig Topper
Date: 2024-05-01T14:55:14-07:00
New Revision: 41466a177a95ee6ff699d190f7625f0b32922a20
URL: https://github.com/llvm/llvm-project/commit/41466a177a95ee6ff699d190f7625f0b32922a20
DIFF: https://github.com/llvm/llvm-project/commit/41466a177a95ee6ff699d190f7625f0b32922a20.diff
LOG: [SelectionDAG] Correct the implementation of m_AllOnes. (#90776)
Previously we used SpecificInt_match which created a 64 bit APInt
containing all ones. This was then checked against other constants by
using APInt::isSameValue.
If the constnats have different bitwidths, APInt::isSameValue will zero
extend the constant to make them match. This means for any constant less
than 64 bits, m_AllOnes was guaranteed to fail since the zero extended
value would not match all ones.
I think would also incorrectly consider an i128 with 64 leading zeros
and 64 trailing zeros as matching m_AllOnes.
To avoid this, this patch adds a new matcher class that just calls
isAllOnesOrAllOnesSplat.
Added:
Modified:
llvm/include/llvm/CodeGen/SDPatternMatch.h
llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 4cc7bb9c3b55a9..c581eb7a60aac9 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -716,7 +716,17 @@ inline SpecificInt_match m_SpecificInt(uint64_t V) {
inline SpecificInt_match m_Zero() { return m_SpecificInt(0U); }
inline SpecificInt_match m_One() { return m_SpecificInt(1U); }
-inline SpecificInt_match m_AllOnes() { return m_SpecificInt(~0U); }
+
+struct AllOnes_match {
+
+ AllOnes_match() = default;
+
+ template <typename MatchContext> bool match(const MatchContext &, SDValue N) {
+ return isAllOnesOrAllOnesSplat(N);
+ }
+};
+
+inline AllOnes_match m_AllOnes() { return AllOnes_match(); }
/// Match true boolean value based on the information provided by
/// TargetLowering.
@@ -766,7 +776,7 @@ inline BinaryOpc_match<SpecificInt_match, ValTy> m_Neg(const ValTy &V) {
/// Match a Not as a xor(v, -1) or xor(-1, v)
template <typename ValTy>
-inline BinaryOpc_match<ValTy, SpecificInt_match, true> m_Not(const ValTy &V) {
+inline BinaryOpc_match<ValTy, AllOnes_match, true> m_Not(const ValTy &V) {
return m_Xor(V, m_AllOnes());
}
diff --git a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
index ae66c5420638bc..f1fd05565c47e9 100644
--- a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
+++ b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
@@ -2384,52 +2384,45 @@ define void @vec384_v2f64(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
define void @vec384_v3i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
; SCALAR-LABEL: vec384_v3i8:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl (%rdi), %ecx
-; SCALAR-NEXT: movl %ecx, %eax
-; SCALAR-NEXT: shrl $16, %eax
-; SCALAR-NEXT: movl %ecx, %edi
-; SCALAR-NEXT: shrl $8, %edi
+; SCALAR-NEXT: movl (%rdi), %eax
+; SCALAR-NEXT: movl %eax, %ecx
+; SCALAR-NEXT: shrl $16, %ecx
; SCALAR-NEXT: notb %cl
-; SCALAR-NEXT: movzbl %cl, %r8d
-; SCALAR-NEXT: notb %dil
-; SCALAR-NEXT: movzbl %dil, %ecx
-; SCALAR-NEXT: shll $8, %ecx
-; SCALAR-NEXT: orl %r8d, %ecx
-; SCALAR-NEXT: notb %al
-; SCALAR-NEXT: movb %al, 2(%rsi)
-; SCALAR-NEXT: movw %cx, (%rsi)
-; SCALAR-NEXT: movb %al, 2(%rdx)
-; SCALAR-NEXT: movw %cx, (%rdx)
-; SCALAR-NEXT: movb %al, 6(%rdx)
-; SCALAR-NEXT: movw %cx, 4(%rdx)
-; SCALAR-NEXT: movb %al, 10(%rdx)
-; SCALAR-NEXT: movw %cx, 8(%rdx)
-; SCALAR-NEXT: movb %al, 14(%rdx)
-; SCALAR-NEXT: movw %cx, 12(%rdx)
-; SCALAR-NEXT: movb %al, 18(%rdx)
-; SCALAR-NEXT: movw %cx, 16(%rdx)
-; SCALAR-NEXT: movb %al, 22(%rdx)
-; SCALAR-NEXT: movw %cx, 20(%rdx)
-; SCALAR-NEXT: movb %al, 26(%rdx)
-; SCALAR-NEXT: movw %cx, 24(%rdx)
-; SCALAR-NEXT: movb %al, 30(%rdx)
-; SCALAR-NEXT: movw %cx, 28(%rdx)
-; SCALAR-NEXT: movb %al, 34(%rdx)
-; SCALAR-NEXT: movw %cx, 32(%rdx)
-; SCALAR-NEXT: movb %al, 38(%rdx)
-; SCALAR-NEXT: movw %cx, 36(%rdx)
-; SCALAR-NEXT: movb %al, 42(%rdx)
-; SCALAR-NEXT: movw %cx, 40(%rdx)
-; SCALAR-NEXT: movb %al, 46(%rdx)
-; SCALAR-NEXT: movw %cx, 44(%rdx)
-; SCALAR-NEXT: movb %al, 50(%rdx)
-; SCALAR-NEXT: movw %cx, 48(%rdx)
-; SCALAR-NEXT: movb %al, 54(%rdx)
-; SCALAR-NEXT: movw %cx, 52(%rdx)
-; SCALAR-NEXT: movb %al, 58(%rdx)
-; SCALAR-NEXT: movw %cx, 56(%rdx)
-; SCALAR-NEXT: movb %al, 62(%rdx)
-; SCALAR-NEXT: movw %cx, 60(%rdx)
+; SCALAR-NEXT: notl %eax
+; SCALAR-NEXT: movw %ax, (%rsi)
+; SCALAR-NEXT: movb %cl, 2(%rsi)
+; SCALAR-NEXT: movb %cl, 2(%rdx)
+; SCALAR-NEXT: movw %ax, (%rdx)
+; SCALAR-NEXT: movb %cl, 6(%rdx)
+; SCALAR-NEXT: movw %ax, 4(%rdx)
+; SCALAR-NEXT: movb %cl, 10(%rdx)
+; SCALAR-NEXT: movw %ax, 8(%rdx)
+; SCALAR-NEXT: movb %cl, 14(%rdx)
+; SCALAR-NEXT: movw %ax, 12(%rdx)
+; SCALAR-NEXT: movb %cl, 18(%rdx)
+; SCALAR-NEXT: movw %ax, 16(%rdx)
+; SCALAR-NEXT: movb %cl, 22(%rdx)
+; SCALAR-NEXT: movw %ax, 20(%rdx)
+; SCALAR-NEXT: movb %cl, 26(%rdx)
+; SCALAR-NEXT: movw %ax, 24(%rdx)
+; SCALAR-NEXT: movb %cl, 30(%rdx)
+; SCALAR-NEXT: movw %ax, 28(%rdx)
+; SCALAR-NEXT: movb %cl, 34(%rdx)
+; SCALAR-NEXT: movw %ax, 32(%rdx)
+; SCALAR-NEXT: movb %cl, 38(%rdx)
+; SCALAR-NEXT: movw %ax, 36(%rdx)
+; SCALAR-NEXT: movb %cl, 42(%rdx)
+; SCALAR-NEXT: movw %ax, 40(%rdx)
+; SCALAR-NEXT: movb %cl, 46(%rdx)
+; SCALAR-NEXT: movw %ax, 44(%rdx)
+; SCALAR-NEXT: movb %cl, 50(%rdx)
+; SCALAR-NEXT: movw %ax, 48(%rdx)
+; SCALAR-NEXT: movb %cl, 54(%rdx)
+; SCALAR-NEXT: movw %ax, 52(%rdx)
+; SCALAR-NEXT: movb %cl, 58(%rdx)
+; SCALAR-NEXT: movw %ax, 56(%rdx)
+; SCALAR-NEXT: movb %cl, 62(%rdx)
+; SCALAR-NEXT: movw %ax, 60(%rdx)
; SCALAR-NEXT: retq
;
; SSE2-ONLY-LABEL: vec384_v3i8:
@@ -3784,56 +3777,29 @@ define void @vec384_v4f32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
define void @vec384_v6i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
; SCALAR-LABEL: vec384_v6i8:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movq (%rdi), %rdi
-; SCALAR-NEXT: movq %rdi, %rax
-; SCALAR-NEXT: shrq $40, %rax
-; SCALAR-NEXT: movq %rdi, %rcx
+; SCALAR-NEXT: movq (%rdi), %rax
+; SCALAR-NEXT: movq %rax, %rcx
; SCALAR-NEXT: shrq $32, %rcx
-; SCALAR-NEXT: movl %edi, %r8d
-; SCALAR-NEXT: shrl $24, %r8d
-; SCALAR-NEXT: movl %edi, %r9d
-; SCALAR-NEXT: shrl $16, %r9d
-; SCALAR-NEXT: movl %edi, %r10d
-; SCALAR-NEXT: shrl $8, %r10d
-; SCALAR-NEXT: notb %dil
-; SCALAR-NEXT: movzbl %dil, %edi
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %edi, %r10d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %edi
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: shll $8, %r8d
-; SCALAR-NEXT: orl %edi, %r8d
-; SCALAR-NEXT: notb %cl
-; SCALAR-NEXT: movzbl %cl, %ecx
-; SCALAR-NEXT: notb %al
-; SCALAR-NEXT: movzbl %al, %eax
-; SCALAR-NEXT: shll $8, %eax
-; SCALAR-NEXT: orl %ecx, %eax
-; SCALAR-NEXT: movw %ax, 4(%rsi)
-; SCALAR-NEXT: shll $16, %r8d
-; SCALAR-NEXT: movzwl %r10w, %ecx
-; SCALAR-NEXT: orl %r8d, %ecx
-; SCALAR-NEXT: movl %ecx, (%rsi)
-; SCALAR-NEXT: movw %ax, 4(%rdx)
-; SCALAR-NEXT: movl %ecx, (%rdx)
-; SCALAR-NEXT: movw %ax, 12(%rdx)
-; SCALAR-NEXT: movl %ecx, 8(%rdx)
-; SCALAR-NEXT: movw %ax, 20(%rdx)
-; SCALAR-NEXT: movl %ecx, 16(%rdx)
-; SCALAR-NEXT: movw %ax, 28(%rdx)
-; SCALAR-NEXT: movl %ecx, 24(%rdx)
-; SCALAR-NEXT: movw %ax, 36(%rdx)
-; SCALAR-NEXT: movl %ecx, 32(%rdx)
-; SCALAR-NEXT: movw %ax, 44(%rdx)
-; SCALAR-NEXT: movl %ecx, 40(%rdx)
-; SCALAR-NEXT: movw %ax, 52(%rdx)
-; SCALAR-NEXT: movl %ecx, 48(%rdx)
-; SCALAR-NEXT: movw %ax, 60(%rdx)
-; SCALAR-NEXT: movl %ecx, 56(%rdx)
+; SCALAR-NEXT: notl %ecx
+; SCALAR-NEXT: notl %eax
+; SCALAR-NEXT: movl %eax, (%rsi)
+; SCALAR-NEXT: movw %cx, 4(%rsi)
+; SCALAR-NEXT: movw %cx, 4(%rdx)
+; SCALAR-NEXT: movl %eax, (%rdx)
+; SCALAR-NEXT: movw %cx, 12(%rdx)
+; SCALAR-NEXT: movl %eax, 8(%rdx)
+; SCALAR-NEXT: movw %cx, 20(%rdx)
+; SCALAR-NEXT: movl %eax, 16(%rdx)
+; SCALAR-NEXT: movw %cx, 28(%rdx)
+; SCALAR-NEXT: movl %eax, 24(%rdx)
+; SCALAR-NEXT: movw %cx, 36(%rdx)
+; SCALAR-NEXT: movl %eax, 32(%rdx)
+; SCALAR-NEXT: movw %cx, 44(%rdx)
+; SCALAR-NEXT: movl %eax, 40(%rdx)
+; SCALAR-NEXT: movw %cx, 52(%rdx)
+; SCALAR-NEXT: movl %eax, 48(%rdx)
+; SCALAR-NEXT: movw %cx, 60(%rdx)
+; SCALAR-NEXT: movl %eax, 56(%rdx)
; SCALAR-NEXT: retq
;
; SSE2-ONLY-LABEL: vec384_v6i8:
@@ -4062,31 +4028,20 @@ define void @vec384_v6i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.p
define void @vec384_v6i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
; SCALAR-LABEL: vec384_v6i16:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: movl 8(%rdi), %eax
-; SCALAR-NEXT: movq (%rdi), %rcx
-; SCALAR-NEXT: movq %rcx, %rdi
-; SCALAR-NEXT: shrq $32, %rdi
-; SCALAR-NEXT: movq %rcx, %r8
-; SCALAR-NEXT: shrq $48, %r8
-; SCALAR-NEXT: notl %r8d
-; SCALAR-NEXT: shll $16, %r8d
-; SCALAR-NEXT: notl %edi
-; SCALAR-NEXT: movzwl %di, %edi
-; SCALAR-NEXT: orl %r8d, %edi
+; SCALAR-NEXT: movq (%rdi), %rax
+; SCALAR-NEXT: movl 8(%rdi), %ecx
; SCALAR-NEXT: notl %ecx
-; SCALAR-NEXT: notl %eax
-; SCALAR-NEXT: movl %eax, 8(%rsi)
-; SCALAR-NEXT: shlq $32, %rdi
-; SCALAR-NEXT: orq %rdi, %rcx
-; SCALAR-NEXT: movq %rcx, (%rsi)
-; SCALAR-NEXT: movl %eax, 8(%rdx)
-; SCALAR-NEXT: movq %rcx, (%rdx)
-; SCALAR-NEXT: movl %eax, 24(%rdx)
-; SCALAR-NEXT: movq %rcx, 16(%rdx)
-; SCALAR-NEXT: movl %eax, 40(%rdx)
-; SCALAR-NEXT: movq %rcx, 32(%rdx)
-; SCALAR-NEXT: movl %eax, 56(%rdx)
-; SCALAR-NEXT: movq %rcx, 48(%rdx)
+; SCALAR-NEXT: notq %rax
+; SCALAR-NEXT: movq %rax, (%rsi)
+; SCALAR-NEXT: movl %ecx, 8(%rsi)
+; SCALAR-NEXT: movl %ecx, 8(%rdx)
+; SCALAR-NEXT: movq %rax, (%rdx)
+; SCALAR-NEXT: movl %ecx, 24(%rdx)
+; SCALAR-NEXT: movq %rax, 16(%rdx)
+; SCALAR-NEXT: movl %ecx, 40(%rdx)
+; SCALAR-NEXT: movq %rax, 32(%rdx)
+; SCALAR-NEXT: movl %ecx, 56(%rdx)
+; SCALAR-NEXT: movq %rax, 48(%rdx)
; SCALAR-NEXT: retq
;
; SSE2-ONLY-LABEL: vec384_v6i16:
@@ -4579,95 +4534,20 @@ define void @vec384_v8i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
define void @vec384_v12i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
; SCALAR-LABEL: vec384_v12i8:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: pushq %rbp
-; SCALAR-NEXT: pushq %r15
-; SCALAR-NEXT: pushq %r14
-; SCALAR-NEXT: pushq %r12
-; SCALAR-NEXT: pushq %rbx
-; SCALAR-NEXT: movq (%rdi), %r9
-; SCALAR-NEXT: movq 8(%rdi), %rcx
-; SCALAR-NEXT: movl %ecx, %eax
-; SCALAR-NEXT: shrl $8, %eax
-; SCALAR-NEXT: movl %ecx, %edi
-; SCALAR-NEXT: shrl $24, %edi
-; SCALAR-NEXT: movl %ecx, %r8d
-; SCALAR-NEXT: shrl $16, %r8d
-; SCALAR-NEXT: movq %r9, %r10
-; SCALAR-NEXT: shrq $40, %r10
-; SCALAR-NEXT: movq %r9, %r11
-; SCALAR-NEXT: shrq $32, %r11
-; SCALAR-NEXT: movq %r9, %rbx
-; SCALAR-NEXT: shrq $56, %rbx
-; SCALAR-NEXT: movq %r9, %r14
-; SCALAR-NEXT: shrq $48, %r14
-; SCALAR-NEXT: movl %r9d, %ebp
-; SCALAR-NEXT: shrl $8, %ebp
-; SCALAR-NEXT: movl %r9d, %r15d
-; SCALAR-NEXT: shrl $24, %r15d
-; SCALAR-NEXT: movl %r9d, %r12d
-; SCALAR-NEXT: shrl $16, %r12d
-; SCALAR-NEXT: notb %r12b
-; SCALAR-NEXT: movzbl %r12b, %r12d
-; SCALAR-NEXT: notb %r15b
-; SCALAR-NEXT: movzbl %r15b, %r15d
-; SCALAR-NEXT: shll $8, %r15d
-; SCALAR-NEXT: orl %r12d, %r15d
-; SCALAR-NEXT: shll $16, %r15d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: notb %bpl
-; SCALAR-NEXT: movzbl %bpl, %ebp
-; SCALAR-NEXT: shll $8, %ebp
-; SCALAR-NEXT: orl %r9d, %ebp
-; SCALAR-NEXT: movzwl %bp, %r9d
-; SCALAR-NEXT: orl %r15d, %r9d
-; SCALAR-NEXT: notb %r14b
-; SCALAR-NEXT: movzbl %r14b, %ebp
-; SCALAR-NEXT: notb %bl
-; SCALAR-NEXT: movzbl %bl, %ebx
-; SCALAR-NEXT: shll $8, %ebx
-; SCALAR-NEXT: orl %ebp, %ebx
-; SCALAR-NEXT: shll $16, %ebx
-; SCALAR-NEXT: notb %r11b
-; SCALAR-NEXT: movzbl %r11b, %r11d
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %r11d, %r10d
-; SCALAR-NEXT: movzwl %r10w, %r10d
-; SCALAR-NEXT: orl %ebx, %r10d
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: notb %dil
-; SCALAR-NEXT: movzbl %dil, %edi
-; SCALAR-NEXT: shll $8, %edi
-; SCALAR-NEXT: orl %r8d, %edi
-; SCALAR-NEXT: shll $16, %edi
-; SCALAR-NEXT: notb %cl
-; SCALAR-NEXT: movzbl %cl, %ecx
-; SCALAR-NEXT: notb %al
-; SCALAR-NEXT: movzbl %al, %eax
-; SCALAR-NEXT: shll $8, %eax
-; SCALAR-NEXT: orl %ecx, %eax
-; SCALAR-NEXT: movzwl %ax, %eax
-; SCALAR-NEXT: orl %edi, %eax
-; SCALAR-NEXT: movl %eax, 8(%rsi)
-; SCALAR-NEXT: shlq $32, %r10
-; SCALAR-NEXT: orq %r10, %r9
-; SCALAR-NEXT: movq %r9, (%rsi)
-; SCALAR-NEXT: movl %eax, 8(%rdx)
-; SCALAR-NEXT: movq %r9, (%rdx)
-; SCALAR-NEXT: movl %eax, 24(%rdx)
-; SCALAR-NEXT: movq %r9, 16(%rdx)
-; SCALAR-NEXT: movl %eax, 40(%rdx)
-; SCALAR-NEXT: movq %r9, 32(%rdx)
-; SCALAR-NEXT: movl %eax, 56(%rdx)
-; SCALAR-NEXT: movq %r9, 48(%rdx)
-; SCALAR-NEXT: popq %rbx
-; SCALAR-NEXT: popq %r12
-; SCALAR-NEXT: popq %r14
-; SCALAR-NEXT: popq %r15
-; SCALAR-NEXT: popq %rbp
+; SCALAR-NEXT: movq (%rdi), %rax
+; SCALAR-NEXT: movl 8(%rdi), %ecx
+; SCALAR-NEXT: notl %ecx
+; SCALAR-NEXT: notq %rax
+; SCALAR-NEXT: movq %rax, (%rsi)
+; SCALAR-NEXT: movl %ecx, 8(%rsi)
+; SCALAR-NEXT: movl %ecx, 8(%rdx)
+; SCALAR-NEXT: movq %rax, (%rdx)
+; SCALAR-NEXT: movl %ecx, 24(%rdx)
+; SCALAR-NEXT: movq %rax, 16(%rdx)
+; SCALAR-NEXT: movl %ecx, 40(%rdx)
+; SCALAR-NEXT: movq %rax, 32(%rdx)
+; SCALAR-NEXT: movl %ecx, 56(%rdx)
+; SCALAR-NEXT: movq %rax, 48(%rdx)
; SCALAR-NEXT: retq
;
; SSE2-ONLY-LABEL: vec384_v12i8:
@@ -4785,47 +4665,12 @@ define void @vec384_v12i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
define void @vec384_v12i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
; SCALAR-LABEL: vec384_v12i16:
; SCALAR: # %bb.0:
-; SCALAR-NEXT: pushq %r14
-; SCALAR-NEXT: pushq %rbx
; SCALAR-NEXT: movq (%rdi), %rax
; SCALAR-NEXT: movq 8(%rdi), %rcx
-; SCALAR-NEXT: movq %rax, %r8
-; SCALAR-NEXT: shrq $32, %r8
-; SCALAR-NEXT: movq %rax, %r9
-; SCALAR-NEXT: shrq $48, %r9
-; SCALAR-NEXT: movq %rcx, %r10
-; SCALAR-NEXT: shrq $32, %r10
-; SCALAR-NEXT: movq %rcx, %r11
-; SCALAR-NEXT: shrq $48, %r11
; SCALAR-NEXT: movq 16(%rdi), %rdi
-; SCALAR-NEXT: movq %rdi, %rbx
-; SCALAR-NEXT: shrq $32, %rbx
-; SCALAR-NEXT: movq %rdi, %r14
-; SCALAR-NEXT: shrq $48, %r14
-; SCALAR-NEXT: notl %r14d
-; SCALAR-NEXT: shll $16, %r14d
-; SCALAR-NEXT: notl %ebx
-; SCALAR-NEXT: movzwl %bx, %ebx
-; SCALAR-NEXT: orl %r14d, %ebx
-; SCALAR-NEXT: shlq $32, %rbx
-; SCALAR-NEXT: notl %edi
-; SCALAR-NEXT: orq %rbx, %rdi
-; SCALAR-NEXT: notl %r11d
-; SCALAR-NEXT: shll $16, %r11d
-; SCALAR-NEXT: notl %r10d
-; SCALAR-NEXT: movzwl %r10w, %r10d
-; SCALAR-NEXT: orl %r11d, %r10d
-; SCALAR-NEXT: shlq $32, %r10
-; SCALAR-NEXT: notl %ecx
-; SCALAR-NEXT: orq %r10, %rcx
-; SCALAR-NEXT: notl %r9d
-; SCALAR-NEXT: shll $16, %r9d
-; SCALAR-NEXT: notl %r8d
-; SCALAR-NEXT: movzwl %r8w, %r8d
-; SCALAR-NEXT: orl %r9d, %r8d
-; SCALAR-NEXT: shlq $32, %r8
-; SCALAR-NEXT: notl %eax
-; SCALAR-NEXT: orq %r8, %rax
+; SCALAR-NEXT: notq %rdi
+; SCALAR-NEXT: notq %rcx
+; SCALAR-NEXT: notq %rax
; SCALAR-NEXT: movq %rax, (%rsi)
; SCALAR-NEXT: movq %rcx, 8(%rsi)
; SCALAR-NEXT: movq %rdi, 16(%rsi)
@@ -4835,8 +4680,6 @@ define void @vec384_v12i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec
; SCALAR-NEXT: movq %rdi, 48(%rdx)
; SCALAR-NEXT: movq %rcx, 40(%rdx)
; SCALAR-NEXT: movq %rax, 32(%rdx)
-; SCALAR-NEXT: popq %rbx
-; SCALAR-NEXT: popq %r14
; SCALAR-NEXT: retq
;
; SSE2-LABEL: vec384_v12i16:
@@ -5085,144 +4928,9 @@ define void @vec384_v24i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
; SCALAR-NEXT: movq (%rdi), %rax
; SCALAR-NEXT: movq 8(%rdi), %rcx
; SCALAR-NEXT: movq 16(%rdi), %rdi
-; SCALAR-NEXT: movq %rdi, %r8
-; SCALAR-NEXT: shrq $40, %r8
-; SCALAR-NEXT: movq %rdi, %r9
-; SCALAR-NEXT: shrq $56, %r9
-; SCALAR-NEXT: movq %rdi, %r10
-; SCALAR-NEXT: shrq $48, %r10
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: shll $8, %r9d
-; SCALAR-NEXT: orl %r10d, %r9d
-; SCALAR-NEXT: movq %rdi, %r10
-; SCALAR-NEXT: shrq $32, %r10
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: shll $8, %r8d
-; SCALAR-NEXT: orl %r10d, %r8d
-; SCALAR-NEXT: movl %edi, %r10d
-; SCALAR-NEXT: shrl $24, %r10d
-; SCALAR-NEXT: shll $16, %r9d
-; SCALAR-NEXT: movzwl %r8w, %r8d
-; SCALAR-NEXT: orl %r9d, %r8d
-; SCALAR-NEXT: movl %edi, %r9d
-; SCALAR-NEXT: shrl $16, %r9d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %r9d, %r10d
-; SCALAR-NEXT: movl %edi, %r9d
-; SCALAR-NEXT: shrl $8, %r9d
-; SCALAR-NEXT: notb %dil
-; SCALAR-NEXT: movzbl %dil, %edi
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r11d
-; SCALAR-NEXT: shll $8, %r11d
-; SCALAR-NEXT: orl %edi, %r11d
-; SCALAR-NEXT: movq %rcx, %r9
-; SCALAR-NEXT: shrq $40, %r9
-; SCALAR-NEXT: shll $16, %r10d
-; SCALAR-NEXT: movzwl %r11w, %edi
-; SCALAR-NEXT: orl %r10d, %edi
-; SCALAR-NEXT: movq %rcx, %r10
-; SCALAR-NEXT: shrq $56, %r10
-; SCALAR-NEXT: shlq $32, %r8
-; SCALAR-NEXT: orq %r8, %rdi
-; SCALAR-NEXT: movq %rcx, %r8
-; SCALAR-NEXT: shrq $48, %r8
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %r8d, %r10d
-; SCALAR-NEXT: movq %rcx, %r8
-; SCALAR-NEXT: shrq $32, %r8
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: shll $8, %r9d
-; SCALAR-NEXT: orl %r8d, %r9d
-; SCALAR-NEXT: movl %ecx, %r11d
-; SCALAR-NEXT: shrl $24, %r11d
-; SCALAR-NEXT: shll $16, %r10d
-; SCALAR-NEXT: movzwl %r9w, %r8d
-; SCALAR-NEXT: orl %r10d, %r8d
-; SCALAR-NEXT: movl %ecx, %r9d
-; SCALAR-NEXT: shrl $16, %r9d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: notb %r11b
-; SCALAR-NEXT: movzbl %r11b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %r9d, %r10d
-; SCALAR-NEXT: movl %ecx, %r9d
-; SCALAR-NEXT: shrl $8, %r9d
-; SCALAR-NEXT: notb %cl
-; SCALAR-NEXT: movzbl %cl, %ecx
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r11d
-; SCALAR-NEXT: shll $8, %r11d
-; SCALAR-NEXT: orl %ecx, %r11d
-; SCALAR-NEXT: movq %rax, %r9
-; SCALAR-NEXT: shrq $40, %r9
-; SCALAR-NEXT: shll $16, %r10d
-; SCALAR-NEXT: movzwl %r11w, %ecx
-; SCALAR-NEXT: orl %r10d, %ecx
-; SCALAR-NEXT: movq %rax, %r10
-; SCALAR-NEXT: shrq $56, %r10
-; SCALAR-NEXT: shlq $32, %r8
-; SCALAR-NEXT: orq %r8, %rcx
-; SCALAR-NEXT: movq %rax, %r8
-; SCALAR-NEXT: shrq $48, %r8
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: notb %r10b
-; SCALAR-NEXT: movzbl %r10b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %r8d, %r10d
-; SCALAR-NEXT: movq %rax, %r8
-; SCALAR-NEXT: shrq $32, %r8
-; SCALAR-NEXT: notb %r8b
-; SCALAR-NEXT: movzbl %r8b, %r8d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: shll $8, %r9d
-; SCALAR-NEXT: orl %r8d, %r9d
-; SCALAR-NEXT: movl %eax, %r11d
-; SCALAR-NEXT: shrl $24, %r11d
-; SCALAR-NEXT: shll $16, %r10d
-; SCALAR-NEXT: movzwl %r9w, %r8d
-; SCALAR-NEXT: orl %r10d, %r8d
-; SCALAR-NEXT: movl %eax, %r9d
-; SCALAR-NEXT: shrl $16, %r9d
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: notb %r11b
-; SCALAR-NEXT: movzbl %r11b, %r10d
-; SCALAR-NEXT: shll $8, %r10d
-; SCALAR-NEXT: orl %r9d, %r10d
-; SCALAR-NEXT: movl %eax, %r9d
-; SCALAR-NEXT: shrl $8, %r9d
-; SCALAR-NEXT: notb %al
-; SCALAR-NEXT: movzbl %al, %eax
-; SCALAR-NEXT: notb %r9b
-; SCALAR-NEXT: movzbl %r9b, %r9d
-; SCALAR-NEXT: shll $8, %r9d
-; SCALAR-NEXT: orl %eax, %r9d
-; SCALAR-NEXT: shll $16, %r10d
-; SCALAR-NEXT: movzwl %r9w, %eax
-; SCALAR-NEXT: orl %r10d, %eax
-; SCALAR-NEXT: shlq $32, %r8
-; SCALAR-NEXT: orq %r8, %rax
+; SCALAR-NEXT: notq %rdi
+; SCALAR-NEXT: notq %rcx
+; SCALAR-NEXT: notq %rax
; SCALAR-NEXT: movq %rax, (%rsi)
; SCALAR-NEXT: movq %rcx, 8(%rsi)
; SCALAR-NEXT: movq %rdi, 16(%rsi)
More information about the llvm-commits
mailing list