[llvm] [SelectionDAG] Correct the implementation of m_AllOnes. (PR #90776)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed May 1 13:41:46 PDT 2024


https://github.com/topperc created https://github.com/llvm/llvm-project/pull/90776

Previously we used SpecificInt_match which created a 64 bit APInt containing all ones. This was then checked against other constants by using APInt::isSameValue.

If the constnats have different bitwidths, APInt::isSameValue will zero extend the constant to make them match. This means for any constant less than 64 bits, m_AllOnes was guaranteed to fail since the zero extended value would not match all ones.

I think would also incorrectly consider an i128 with 64 leading zeros and 64 trailing zeros as matching m_AllOnes.

To avoid this, this patch adds a new matcher class that just calls isAllOnesOrAllOnesSplat.

>From 6de6c69e8a2000126ca9dcc98fda4f610524d5de Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 1 May 2024 13:35:39 -0700
Subject: [PATCH] [SelectionDAG] Correct the implementation of m_AllOnes.

Previously we used SpecificInt_match which created a 64 bit APInt
containing all ones. This was then checked against other constants
by using APInt::isSameValue.

If the constnats have different bitwidths, APInt::isSameValue will
zero extend the constant to make them match. This means for any
constant less than 64 bits, m_AllOnes was guaranteed to fail since
the zero extended value would not match all ones.

I think would also incorrectly consider an i128 with 64 leading zeros
and 64 trailing zeros as matching m_AllOnes.

To avoid this, this patch adds a new matcher class that just calls
isAllOnesOrAllOnesSplat.
---
 llvm/include/llvm/CodeGen/SDPatternMatch.h    |  14 +-
 .../subvectorwise-store-of-vector-splat.ll    | 478 ++++--------------
 2 files changed, 105 insertions(+), 387 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 4cc7bb9c3b55a9..c581eb7a60aac9 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -716,7 +716,17 @@ inline SpecificInt_match m_SpecificInt(uint64_t V) {
 
 inline SpecificInt_match m_Zero() { return m_SpecificInt(0U); }
 inline SpecificInt_match m_One() { return m_SpecificInt(1U); }
-inline SpecificInt_match m_AllOnes() { return m_SpecificInt(~0U); }
+
+struct AllOnes_match {
+
+  AllOnes_match() = default;
+
+  template <typename MatchContext> bool match(const MatchContext &, SDValue N) {
+    return isAllOnesOrAllOnesSplat(N);
+  }
+};
+
+inline AllOnes_match m_AllOnes() { return AllOnes_match(); }
 
 /// Match true boolean value based on the information provided by
 /// TargetLowering.
@@ -766,7 +776,7 @@ inline BinaryOpc_match<SpecificInt_match, ValTy> m_Neg(const ValTy &V) {
 
 /// Match a Not as a xor(v, -1) or xor(-1, v)
 template <typename ValTy>
-inline BinaryOpc_match<ValTy, SpecificInt_match, true> m_Not(const ValTy &V) {
+inline BinaryOpc_match<ValTy, AllOnes_match, true> m_Not(const ValTy &V) {
   return m_Xor(V, m_AllOnes());
 }
 
diff --git a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
index ae66c5420638bc..f1fd05565c47e9 100644
--- a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
+++ b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll
@@ -2384,52 +2384,45 @@ define void @vec384_v2f64(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
 define void @vec384_v3i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
 ; SCALAR-LABEL: vec384_v3i8:
 ; SCALAR:       # %bb.0:
-; SCALAR-NEXT:    movl (%rdi), %ecx
-; SCALAR-NEXT:    movl %ecx, %eax
-; SCALAR-NEXT:    shrl $16, %eax
-; SCALAR-NEXT:    movl %ecx, %edi
-; SCALAR-NEXT:    shrl $8, %edi
+; SCALAR-NEXT:    movl (%rdi), %eax
+; SCALAR-NEXT:    movl %eax, %ecx
+; SCALAR-NEXT:    shrl $16, %ecx
 ; SCALAR-NEXT:    notb %cl
-; SCALAR-NEXT:    movzbl %cl, %r8d
-; SCALAR-NEXT:    notb %dil
-; SCALAR-NEXT:    movzbl %dil, %ecx
-; SCALAR-NEXT:    shll $8, %ecx
-; SCALAR-NEXT:    orl %r8d, %ecx
-; SCALAR-NEXT:    notb %al
-; SCALAR-NEXT:    movb %al, 2(%rsi)
-; SCALAR-NEXT:    movw %cx, (%rsi)
-; SCALAR-NEXT:    movb %al, 2(%rdx)
-; SCALAR-NEXT:    movw %cx, (%rdx)
-; SCALAR-NEXT:    movb %al, 6(%rdx)
-; SCALAR-NEXT:    movw %cx, 4(%rdx)
-; SCALAR-NEXT:    movb %al, 10(%rdx)
-; SCALAR-NEXT:    movw %cx, 8(%rdx)
-; SCALAR-NEXT:    movb %al, 14(%rdx)
-; SCALAR-NEXT:    movw %cx, 12(%rdx)
-; SCALAR-NEXT:    movb %al, 18(%rdx)
-; SCALAR-NEXT:    movw %cx, 16(%rdx)
-; SCALAR-NEXT:    movb %al, 22(%rdx)
-; SCALAR-NEXT:    movw %cx, 20(%rdx)
-; SCALAR-NEXT:    movb %al, 26(%rdx)
-; SCALAR-NEXT:    movw %cx, 24(%rdx)
-; SCALAR-NEXT:    movb %al, 30(%rdx)
-; SCALAR-NEXT:    movw %cx, 28(%rdx)
-; SCALAR-NEXT:    movb %al, 34(%rdx)
-; SCALAR-NEXT:    movw %cx, 32(%rdx)
-; SCALAR-NEXT:    movb %al, 38(%rdx)
-; SCALAR-NEXT:    movw %cx, 36(%rdx)
-; SCALAR-NEXT:    movb %al, 42(%rdx)
-; SCALAR-NEXT:    movw %cx, 40(%rdx)
-; SCALAR-NEXT:    movb %al, 46(%rdx)
-; SCALAR-NEXT:    movw %cx, 44(%rdx)
-; SCALAR-NEXT:    movb %al, 50(%rdx)
-; SCALAR-NEXT:    movw %cx, 48(%rdx)
-; SCALAR-NEXT:    movb %al, 54(%rdx)
-; SCALAR-NEXT:    movw %cx, 52(%rdx)
-; SCALAR-NEXT:    movb %al, 58(%rdx)
-; SCALAR-NEXT:    movw %cx, 56(%rdx)
-; SCALAR-NEXT:    movb %al, 62(%rdx)
-; SCALAR-NEXT:    movw %cx, 60(%rdx)
+; SCALAR-NEXT:    notl %eax
+; SCALAR-NEXT:    movw %ax, (%rsi)
+; SCALAR-NEXT:    movb %cl, 2(%rsi)
+; SCALAR-NEXT:    movb %cl, 2(%rdx)
+; SCALAR-NEXT:    movw %ax, (%rdx)
+; SCALAR-NEXT:    movb %cl, 6(%rdx)
+; SCALAR-NEXT:    movw %ax, 4(%rdx)
+; SCALAR-NEXT:    movb %cl, 10(%rdx)
+; SCALAR-NEXT:    movw %ax, 8(%rdx)
+; SCALAR-NEXT:    movb %cl, 14(%rdx)
+; SCALAR-NEXT:    movw %ax, 12(%rdx)
+; SCALAR-NEXT:    movb %cl, 18(%rdx)
+; SCALAR-NEXT:    movw %ax, 16(%rdx)
+; SCALAR-NEXT:    movb %cl, 22(%rdx)
+; SCALAR-NEXT:    movw %ax, 20(%rdx)
+; SCALAR-NEXT:    movb %cl, 26(%rdx)
+; SCALAR-NEXT:    movw %ax, 24(%rdx)
+; SCALAR-NEXT:    movb %cl, 30(%rdx)
+; SCALAR-NEXT:    movw %ax, 28(%rdx)
+; SCALAR-NEXT:    movb %cl, 34(%rdx)
+; SCALAR-NEXT:    movw %ax, 32(%rdx)
+; SCALAR-NEXT:    movb %cl, 38(%rdx)
+; SCALAR-NEXT:    movw %ax, 36(%rdx)
+; SCALAR-NEXT:    movb %cl, 42(%rdx)
+; SCALAR-NEXT:    movw %ax, 40(%rdx)
+; SCALAR-NEXT:    movb %cl, 46(%rdx)
+; SCALAR-NEXT:    movw %ax, 44(%rdx)
+; SCALAR-NEXT:    movb %cl, 50(%rdx)
+; SCALAR-NEXT:    movw %ax, 48(%rdx)
+; SCALAR-NEXT:    movb %cl, 54(%rdx)
+; SCALAR-NEXT:    movw %ax, 52(%rdx)
+; SCALAR-NEXT:    movb %cl, 58(%rdx)
+; SCALAR-NEXT:    movw %ax, 56(%rdx)
+; SCALAR-NEXT:    movb %cl, 62(%rdx)
+; SCALAR-NEXT:    movw %ax, 60(%rdx)
 ; SCALAR-NEXT:    retq
 ;
 ; SSE2-ONLY-LABEL: vec384_v3i8:
@@ -3784,56 +3777,29 @@ define void @vec384_v4f32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
 define void @vec384_v6i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
 ; SCALAR-LABEL: vec384_v6i8:
 ; SCALAR:       # %bb.0:
-; SCALAR-NEXT:    movq (%rdi), %rdi
-; SCALAR-NEXT:    movq %rdi, %rax
-; SCALAR-NEXT:    shrq $40, %rax
-; SCALAR-NEXT:    movq %rdi, %rcx
+; SCALAR-NEXT:    movq (%rdi), %rax
+; SCALAR-NEXT:    movq %rax, %rcx
 ; SCALAR-NEXT:    shrq $32, %rcx
-; SCALAR-NEXT:    movl %edi, %r8d
-; SCALAR-NEXT:    shrl $24, %r8d
-; SCALAR-NEXT:    movl %edi, %r9d
-; SCALAR-NEXT:    shrl $16, %r9d
-; SCALAR-NEXT:    movl %edi, %r10d
-; SCALAR-NEXT:    shrl $8, %r10d
-; SCALAR-NEXT:    notb %dil
-; SCALAR-NEXT:    movzbl %dil, %edi
-; SCALAR-NEXT:    notb %r10b
-; SCALAR-NEXT:    movzbl %r10b, %r10d
-; SCALAR-NEXT:    shll $8, %r10d
-; SCALAR-NEXT:    orl %edi, %r10d
-; SCALAR-NEXT:    notb %r9b
-; SCALAR-NEXT:    movzbl %r9b, %edi
-; SCALAR-NEXT:    notb %r8b
-; SCALAR-NEXT:    movzbl %r8b, %r8d
-; SCALAR-NEXT:    shll $8, %r8d
-; SCALAR-NEXT:    orl %edi, %r8d
-; SCALAR-NEXT:    notb %cl
-; SCALAR-NEXT:    movzbl %cl, %ecx
-; SCALAR-NEXT:    notb %al
-; SCALAR-NEXT:    movzbl %al, %eax
-; SCALAR-NEXT:    shll $8, %eax
-; SCALAR-NEXT:    orl %ecx, %eax
-; SCALAR-NEXT:    movw %ax, 4(%rsi)
-; SCALAR-NEXT:    shll $16, %r8d
-; SCALAR-NEXT:    movzwl %r10w, %ecx
-; SCALAR-NEXT:    orl %r8d, %ecx
-; SCALAR-NEXT:    movl %ecx, (%rsi)
-; SCALAR-NEXT:    movw %ax, 4(%rdx)
-; SCALAR-NEXT:    movl %ecx, (%rdx)
-; SCALAR-NEXT:    movw %ax, 12(%rdx)
-; SCALAR-NEXT:    movl %ecx, 8(%rdx)
-; SCALAR-NEXT:    movw %ax, 20(%rdx)
-; SCALAR-NEXT:    movl %ecx, 16(%rdx)
-; SCALAR-NEXT:    movw %ax, 28(%rdx)
-; SCALAR-NEXT:    movl %ecx, 24(%rdx)
-; SCALAR-NEXT:    movw %ax, 36(%rdx)
-; SCALAR-NEXT:    movl %ecx, 32(%rdx)
-; SCALAR-NEXT:    movw %ax, 44(%rdx)
-; SCALAR-NEXT:    movl %ecx, 40(%rdx)
-; SCALAR-NEXT:    movw %ax, 52(%rdx)
-; SCALAR-NEXT:    movl %ecx, 48(%rdx)
-; SCALAR-NEXT:    movw %ax, 60(%rdx)
-; SCALAR-NEXT:    movl %ecx, 56(%rdx)
+; SCALAR-NEXT:    notl %ecx
+; SCALAR-NEXT:    notl %eax
+; SCALAR-NEXT:    movl %eax, (%rsi)
+; SCALAR-NEXT:    movw %cx, 4(%rsi)
+; SCALAR-NEXT:    movw %cx, 4(%rdx)
+; SCALAR-NEXT:    movl %eax, (%rdx)
+; SCALAR-NEXT:    movw %cx, 12(%rdx)
+; SCALAR-NEXT:    movl %eax, 8(%rdx)
+; SCALAR-NEXT:    movw %cx, 20(%rdx)
+; SCALAR-NEXT:    movl %eax, 16(%rdx)
+; SCALAR-NEXT:    movw %cx, 28(%rdx)
+; SCALAR-NEXT:    movl %eax, 24(%rdx)
+; SCALAR-NEXT:    movw %cx, 36(%rdx)
+; SCALAR-NEXT:    movl %eax, 32(%rdx)
+; SCALAR-NEXT:    movw %cx, 44(%rdx)
+; SCALAR-NEXT:    movl %eax, 40(%rdx)
+; SCALAR-NEXT:    movw %cx, 52(%rdx)
+; SCALAR-NEXT:    movl %eax, 48(%rdx)
+; SCALAR-NEXT:    movw %cx, 60(%rdx)
+; SCALAR-NEXT:    movl %eax, 56(%rdx)
 ; SCALAR-NEXT:    retq
 ;
 ; SSE2-ONLY-LABEL: vec384_v6i8:
@@ -4062,31 +4028,20 @@ define void @vec384_v6i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.p
 define void @vec384_v6i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
 ; SCALAR-LABEL: vec384_v6i16:
 ; SCALAR:       # %bb.0:
-; SCALAR-NEXT:    movl 8(%rdi), %eax
-; SCALAR-NEXT:    movq (%rdi), %rcx
-; SCALAR-NEXT:    movq %rcx, %rdi
-; SCALAR-NEXT:    shrq $32, %rdi
-; SCALAR-NEXT:    movq %rcx, %r8
-; SCALAR-NEXT:    shrq $48, %r8
-; SCALAR-NEXT:    notl %r8d
-; SCALAR-NEXT:    shll $16, %r8d
-; SCALAR-NEXT:    notl %edi
-; SCALAR-NEXT:    movzwl %di, %edi
-; SCALAR-NEXT:    orl %r8d, %edi
+; SCALAR-NEXT:    movq (%rdi), %rax
+; SCALAR-NEXT:    movl 8(%rdi), %ecx
 ; SCALAR-NEXT:    notl %ecx
-; SCALAR-NEXT:    notl %eax
-; SCALAR-NEXT:    movl %eax, 8(%rsi)
-; SCALAR-NEXT:    shlq $32, %rdi
-; SCALAR-NEXT:    orq %rdi, %rcx
-; SCALAR-NEXT:    movq %rcx, (%rsi)
-; SCALAR-NEXT:    movl %eax, 8(%rdx)
-; SCALAR-NEXT:    movq %rcx, (%rdx)
-; SCALAR-NEXT:    movl %eax, 24(%rdx)
-; SCALAR-NEXT:    movq %rcx, 16(%rdx)
-; SCALAR-NEXT:    movl %eax, 40(%rdx)
-; SCALAR-NEXT:    movq %rcx, 32(%rdx)
-; SCALAR-NEXT:    movl %eax, 56(%rdx)
-; SCALAR-NEXT:    movq %rcx, 48(%rdx)
+; SCALAR-NEXT:    notq %rax
+; SCALAR-NEXT:    movq %rax, (%rsi)
+; SCALAR-NEXT:    movl %ecx, 8(%rsi)
+; SCALAR-NEXT:    movl %ecx, 8(%rdx)
+; SCALAR-NEXT:    movq %rax, (%rdx)
+; SCALAR-NEXT:    movl %ecx, 24(%rdx)
+; SCALAR-NEXT:    movq %rax, 16(%rdx)
+; SCALAR-NEXT:    movl %ecx, 40(%rdx)
+; SCALAR-NEXT:    movq %rax, 32(%rdx)
+; SCALAR-NEXT:    movl %ecx, 56(%rdx)
+; SCALAR-NEXT:    movq %rax, 48(%rdx)
 ; SCALAR-NEXT:    retq
 ;
 ; SSE2-ONLY-LABEL: vec384_v6i16:
@@ -4579,95 +4534,20 @@ define void @vec384_v8i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
 define void @vec384_v12i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
 ; SCALAR-LABEL: vec384_v12i8:
 ; SCALAR:       # %bb.0:
-; SCALAR-NEXT:    pushq %rbp
-; SCALAR-NEXT:    pushq %r15
-; SCALAR-NEXT:    pushq %r14
-; SCALAR-NEXT:    pushq %r12
-; SCALAR-NEXT:    pushq %rbx
-; SCALAR-NEXT:    movq (%rdi), %r9
-; SCALAR-NEXT:    movq 8(%rdi), %rcx
-; SCALAR-NEXT:    movl %ecx, %eax
-; SCALAR-NEXT:    shrl $8, %eax
-; SCALAR-NEXT:    movl %ecx, %edi
-; SCALAR-NEXT:    shrl $24, %edi
-; SCALAR-NEXT:    movl %ecx, %r8d
-; SCALAR-NEXT:    shrl $16, %r8d
-; SCALAR-NEXT:    movq %r9, %r10
-; SCALAR-NEXT:    shrq $40, %r10
-; SCALAR-NEXT:    movq %r9, %r11
-; SCALAR-NEXT:    shrq $32, %r11
-; SCALAR-NEXT:    movq %r9, %rbx
-; SCALAR-NEXT:    shrq $56, %rbx
-; SCALAR-NEXT:    movq %r9, %r14
-; SCALAR-NEXT:    shrq $48, %r14
-; SCALAR-NEXT:    movl %r9d, %ebp
-; SCALAR-NEXT:    shrl $8, %ebp
-; SCALAR-NEXT:    movl %r9d, %r15d
-; SCALAR-NEXT:    shrl $24, %r15d
-; SCALAR-NEXT:    movl %r9d, %r12d
-; SCALAR-NEXT:    shrl $16, %r12d
-; SCALAR-NEXT:    notb %r12b
-; SCALAR-NEXT:    movzbl %r12b, %r12d
-; SCALAR-NEXT:    notb %r15b
-; SCALAR-NEXT:    movzbl %r15b, %r15d
-; SCALAR-NEXT:    shll $8, %r15d
-; SCALAR-NEXT:    orl %r12d, %r15d
-; SCALAR-NEXT:    shll $16, %r15d
-; SCALAR-NEXT:    notb %r9b
-; SCALAR-NEXT:    movzbl %r9b, %r9d
-; SCALAR-NEXT:    notb %bpl
-; SCALAR-NEXT:    movzbl %bpl, %ebp
-; SCALAR-NEXT:    shll $8, %ebp
-; SCALAR-NEXT:    orl %r9d, %ebp
-; SCALAR-NEXT:    movzwl %bp, %r9d
-; SCALAR-NEXT:    orl %r15d, %r9d
-; SCALAR-NEXT:    notb %r14b
-; SCALAR-NEXT:    movzbl %r14b, %ebp
-; SCALAR-NEXT:    notb %bl
-; SCALAR-NEXT:    movzbl %bl, %ebx
-; SCALAR-NEXT:    shll $8, %ebx
-; SCALAR-NEXT:    orl %ebp, %ebx
-; SCALAR-NEXT:    shll $16, %ebx
-; SCALAR-NEXT:    notb %r11b
-; SCALAR-NEXT:    movzbl %r11b, %r11d
-; SCALAR-NEXT:    notb %r10b
-; SCALAR-NEXT:    movzbl %r10b, %r10d
-; SCALAR-NEXT:    shll $8, %r10d
-; SCALAR-NEXT:    orl %r11d, %r10d
-; SCALAR-NEXT:    movzwl %r10w, %r10d
-; SCALAR-NEXT:    orl %ebx, %r10d
-; SCALAR-NEXT:    notb %r8b
-; SCALAR-NEXT:    movzbl %r8b, %r8d
-; SCALAR-NEXT:    notb %dil
-; SCALAR-NEXT:    movzbl %dil, %edi
-; SCALAR-NEXT:    shll $8, %edi
-; SCALAR-NEXT:    orl %r8d, %edi
-; SCALAR-NEXT:    shll $16, %edi
-; SCALAR-NEXT:    notb %cl
-; SCALAR-NEXT:    movzbl %cl, %ecx
-; SCALAR-NEXT:    notb %al
-; SCALAR-NEXT:    movzbl %al, %eax
-; SCALAR-NEXT:    shll $8, %eax
-; SCALAR-NEXT:    orl %ecx, %eax
-; SCALAR-NEXT:    movzwl %ax, %eax
-; SCALAR-NEXT:    orl %edi, %eax
-; SCALAR-NEXT:    movl %eax, 8(%rsi)
-; SCALAR-NEXT:    shlq $32, %r10
-; SCALAR-NEXT:    orq %r10, %r9
-; SCALAR-NEXT:    movq %r9, (%rsi)
-; SCALAR-NEXT:    movl %eax, 8(%rdx)
-; SCALAR-NEXT:    movq %r9, (%rdx)
-; SCALAR-NEXT:    movl %eax, 24(%rdx)
-; SCALAR-NEXT:    movq %r9, 16(%rdx)
-; SCALAR-NEXT:    movl %eax, 40(%rdx)
-; SCALAR-NEXT:    movq %r9, 32(%rdx)
-; SCALAR-NEXT:    movl %eax, 56(%rdx)
-; SCALAR-NEXT:    movq %r9, 48(%rdx)
-; SCALAR-NEXT:    popq %rbx
-; SCALAR-NEXT:    popq %r12
-; SCALAR-NEXT:    popq %r14
-; SCALAR-NEXT:    popq %r15
-; SCALAR-NEXT:    popq %rbp
+; SCALAR-NEXT:    movq (%rdi), %rax
+; SCALAR-NEXT:    movl 8(%rdi), %ecx
+; SCALAR-NEXT:    notl %ecx
+; SCALAR-NEXT:    notq %rax
+; SCALAR-NEXT:    movq %rax, (%rsi)
+; SCALAR-NEXT:    movl %ecx, 8(%rsi)
+; SCALAR-NEXT:    movl %ecx, 8(%rdx)
+; SCALAR-NEXT:    movq %rax, (%rdx)
+; SCALAR-NEXT:    movl %ecx, 24(%rdx)
+; SCALAR-NEXT:    movq %rax, 16(%rdx)
+; SCALAR-NEXT:    movl %ecx, 40(%rdx)
+; SCALAR-NEXT:    movq %rax, 32(%rdx)
+; SCALAR-NEXT:    movl %ecx, 56(%rdx)
+; SCALAR-NEXT:    movq %rax, 48(%rdx)
 ; SCALAR-NEXT:    retq
 ;
 ; SSE2-ONLY-LABEL: vec384_v12i8:
@@ -4785,47 +4665,12 @@ define void @vec384_v12i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
 define void @vec384_v12i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind {
 ; SCALAR-LABEL: vec384_v12i16:
 ; SCALAR:       # %bb.0:
-; SCALAR-NEXT:    pushq %r14
-; SCALAR-NEXT:    pushq %rbx
 ; SCALAR-NEXT:    movq (%rdi), %rax
 ; SCALAR-NEXT:    movq 8(%rdi), %rcx
-; SCALAR-NEXT:    movq %rax, %r8
-; SCALAR-NEXT:    shrq $32, %r8
-; SCALAR-NEXT:    movq %rax, %r9
-; SCALAR-NEXT:    shrq $48, %r9
-; SCALAR-NEXT:    movq %rcx, %r10
-; SCALAR-NEXT:    shrq $32, %r10
-; SCALAR-NEXT:    movq %rcx, %r11
-; SCALAR-NEXT:    shrq $48, %r11
 ; SCALAR-NEXT:    movq 16(%rdi), %rdi
-; SCALAR-NEXT:    movq %rdi, %rbx
-; SCALAR-NEXT:    shrq $32, %rbx
-; SCALAR-NEXT:    movq %rdi, %r14
-; SCALAR-NEXT:    shrq $48, %r14
-; SCALAR-NEXT:    notl %r14d
-; SCALAR-NEXT:    shll $16, %r14d
-; SCALAR-NEXT:    notl %ebx
-; SCALAR-NEXT:    movzwl %bx, %ebx
-; SCALAR-NEXT:    orl %r14d, %ebx
-; SCALAR-NEXT:    shlq $32, %rbx
-; SCALAR-NEXT:    notl %edi
-; SCALAR-NEXT:    orq %rbx, %rdi
-; SCALAR-NEXT:    notl %r11d
-; SCALAR-NEXT:    shll $16, %r11d
-; SCALAR-NEXT:    notl %r10d
-; SCALAR-NEXT:    movzwl %r10w, %r10d
-; SCALAR-NEXT:    orl %r11d, %r10d
-; SCALAR-NEXT:    shlq $32, %r10
-; SCALAR-NEXT:    notl %ecx
-; SCALAR-NEXT:    orq %r10, %rcx
-; SCALAR-NEXT:    notl %r9d
-; SCALAR-NEXT:    shll $16, %r9d
-; SCALAR-NEXT:    notl %r8d
-; SCALAR-NEXT:    movzwl %r8w, %r8d
-; SCALAR-NEXT:    orl %r9d, %r8d
-; SCALAR-NEXT:    shlq $32, %r8
-; SCALAR-NEXT:    notl %eax
-; SCALAR-NEXT:    orq %r8, %rax
+; SCALAR-NEXT:    notq %rdi
+; SCALAR-NEXT:    notq %rcx
+; SCALAR-NEXT:    notq %rax
 ; SCALAR-NEXT:    movq %rax, (%rsi)
 ; SCALAR-NEXT:    movq %rcx, 8(%rsi)
 ; SCALAR-NEXT:    movq %rdi, 16(%rsi)
@@ -4835,8 +4680,6 @@ define void @vec384_v12i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec
 ; SCALAR-NEXT:    movq %rdi, 48(%rdx)
 ; SCALAR-NEXT:    movq %rcx, 40(%rdx)
 ; SCALAR-NEXT:    movq %rax, 32(%rdx)
-; SCALAR-NEXT:    popq %rbx
-; SCALAR-NEXT:    popq %r14
 ; SCALAR-NEXT:    retq
 ;
 ; SSE2-LABEL: vec384_v12i16:
@@ -5085,144 +4928,9 @@ define void @vec384_v24i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.
 ; SCALAR-NEXT:    movq (%rdi), %rax
 ; SCALAR-NEXT:    movq 8(%rdi), %rcx
 ; SCALAR-NEXT:    movq 16(%rdi), %rdi
-; SCALAR-NEXT:    movq %rdi, %r8
-; SCALAR-NEXT:    shrq $40, %r8
-; SCALAR-NEXT:    movq %rdi, %r9
-; SCALAR-NEXT:    shrq $56, %r9
-; SCALAR-NEXT:    movq %rdi, %r10
-; SCALAR-NEXT:    shrq $48, %r10
-; SCALAR-NEXT:    notb %r10b
-; SCALAR-NEXT:    movzbl %r10b, %r10d
-; SCALAR-NEXT:    notb %r9b
-; SCALAR-NEXT:    movzbl %r9b, %r9d
-; SCALAR-NEXT:    shll $8, %r9d
-; SCALAR-NEXT:    orl %r10d, %r9d
-; SCALAR-NEXT:    movq %rdi, %r10
-; SCALAR-NEXT:    shrq $32, %r10
-; SCALAR-NEXT:    notb %r10b
-; SCALAR-NEXT:    movzbl %r10b, %r10d
-; SCALAR-NEXT:    notb %r8b
-; SCALAR-NEXT:    movzbl %r8b, %r8d
-; SCALAR-NEXT:    shll $8, %r8d
-; SCALAR-NEXT:    orl %r10d, %r8d
-; SCALAR-NEXT:    movl %edi, %r10d
-; SCALAR-NEXT:    shrl $24, %r10d
-; SCALAR-NEXT:    shll $16, %r9d
-; SCALAR-NEXT:    movzwl %r8w, %r8d
-; SCALAR-NEXT:    orl %r9d, %r8d
-; SCALAR-NEXT:    movl %edi, %r9d
-; SCALAR-NEXT:    shrl $16, %r9d
-; SCALAR-NEXT:    notb %r9b
-; SCALAR-NEXT:    movzbl %r9b, %r9d
-; SCALAR-NEXT:    notb %r10b
-; SCALAR-NEXT:    movzbl %r10b, %r10d
-; SCALAR-NEXT:    shll $8, %r10d
-; SCALAR-NEXT:    orl %r9d, %r10d
-; SCALAR-NEXT:    movl %edi, %r9d
-; SCALAR-NEXT:    shrl $8, %r9d
-; SCALAR-NEXT:    notb %dil
-; SCALAR-NEXT:    movzbl %dil, %edi
-; SCALAR-NEXT:    notb %r9b
-; SCALAR-NEXT:    movzbl %r9b, %r11d
-; SCALAR-NEXT:    shll $8, %r11d
-; SCALAR-NEXT:    orl %edi, %r11d
-; SCALAR-NEXT:    movq %rcx, %r9
-; SCALAR-NEXT:    shrq $40, %r9
-; SCALAR-NEXT:    shll $16, %r10d
-; SCALAR-NEXT:    movzwl %r11w, %edi
-; SCALAR-NEXT:    orl %r10d, %edi
-; SCALAR-NEXT:    movq %rcx, %r10
-; SCALAR-NEXT:    shrq $56, %r10
-; SCALAR-NEXT:    shlq $32, %r8
-; SCALAR-NEXT:    orq %r8, %rdi
-; SCALAR-NEXT:    movq %rcx, %r8
-; SCALAR-NEXT:    shrq $48, %r8
-; SCALAR-NEXT:    notb %r8b
-; SCALAR-NEXT:    movzbl %r8b, %r8d
-; SCALAR-NEXT:    notb %r10b
-; SCALAR-NEXT:    movzbl %r10b, %r10d
-; SCALAR-NEXT:    shll $8, %r10d
-; SCALAR-NEXT:    orl %r8d, %r10d
-; SCALAR-NEXT:    movq %rcx, %r8
-; SCALAR-NEXT:    shrq $32, %r8
-; SCALAR-NEXT:    notb %r8b
-; SCALAR-NEXT:    movzbl %r8b, %r8d
-; SCALAR-NEXT:    notb %r9b
-; SCALAR-NEXT:    movzbl %r9b, %r9d
-; SCALAR-NEXT:    shll $8, %r9d
-; SCALAR-NEXT:    orl %r8d, %r9d
-; SCALAR-NEXT:    movl %ecx, %r11d
-; SCALAR-NEXT:    shrl $24, %r11d
-; SCALAR-NEXT:    shll $16, %r10d
-; SCALAR-NEXT:    movzwl %r9w, %r8d
-; SCALAR-NEXT:    orl %r10d, %r8d
-; SCALAR-NEXT:    movl %ecx, %r9d
-; SCALAR-NEXT:    shrl $16, %r9d
-; SCALAR-NEXT:    notb %r9b
-; SCALAR-NEXT:    movzbl %r9b, %r9d
-; SCALAR-NEXT:    notb %r11b
-; SCALAR-NEXT:    movzbl %r11b, %r10d
-; SCALAR-NEXT:    shll $8, %r10d
-; SCALAR-NEXT:    orl %r9d, %r10d
-; SCALAR-NEXT:    movl %ecx, %r9d
-; SCALAR-NEXT:    shrl $8, %r9d
-; SCALAR-NEXT:    notb %cl
-; SCALAR-NEXT:    movzbl %cl, %ecx
-; SCALAR-NEXT:    notb %r9b
-; SCALAR-NEXT:    movzbl %r9b, %r11d
-; SCALAR-NEXT:    shll $8, %r11d
-; SCALAR-NEXT:    orl %ecx, %r11d
-; SCALAR-NEXT:    movq %rax, %r9
-; SCALAR-NEXT:    shrq $40, %r9
-; SCALAR-NEXT:    shll $16, %r10d
-; SCALAR-NEXT:    movzwl %r11w, %ecx
-; SCALAR-NEXT:    orl %r10d, %ecx
-; SCALAR-NEXT:    movq %rax, %r10
-; SCALAR-NEXT:    shrq $56, %r10
-; SCALAR-NEXT:    shlq $32, %r8
-; SCALAR-NEXT:    orq %r8, %rcx
-; SCALAR-NEXT:    movq %rax, %r8
-; SCALAR-NEXT:    shrq $48, %r8
-; SCALAR-NEXT:    notb %r8b
-; SCALAR-NEXT:    movzbl %r8b, %r8d
-; SCALAR-NEXT:    notb %r10b
-; SCALAR-NEXT:    movzbl %r10b, %r10d
-; SCALAR-NEXT:    shll $8, %r10d
-; SCALAR-NEXT:    orl %r8d, %r10d
-; SCALAR-NEXT:    movq %rax, %r8
-; SCALAR-NEXT:    shrq $32, %r8
-; SCALAR-NEXT:    notb %r8b
-; SCALAR-NEXT:    movzbl %r8b, %r8d
-; SCALAR-NEXT:    notb %r9b
-; SCALAR-NEXT:    movzbl %r9b, %r9d
-; SCALAR-NEXT:    shll $8, %r9d
-; SCALAR-NEXT:    orl %r8d, %r9d
-; SCALAR-NEXT:    movl %eax, %r11d
-; SCALAR-NEXT:    shrl $24, %r11d
-; SCALAR-NEXT:    shll $16, %r10d
-; SCALAR-NEXT:    movzwl %r9w, %r8d
-; SCALAR-NEXT:    orl %r10d, %r8d
-; SCALAR-NEXT:    movl %eax, %r9d
-; SCALAR-NEXT:    shrl $16, %r9d
-; SCALAR-NEXT:    notb %r9b
-; SCALAR-NEXT:    movzbl %r9b, %r9d
-; SCALAR-NEXT:    notb %r11b
-; SCALAR-NEXT:    movzbl %r11b, %r10d
-; SCALAR-NEXT:    shll $8, %r10d
-; SCALAR-NEXT:    orl %r9d, %r10d
-; SCALAR-NEXT:    movl %eax, %r9d
-; SCALAR-NEXT:    shrl $8, %r9d
-; SCALAR-NEXT:    notb %al
-; SCALAR-NEXT:    movzbl %al, %eax
-; SCALAR-NEXT:    notb %r9b
-; SCALAR-NEXT:    movzbl %r9b, %r9d
-; SCALAR-NEXT:    shll $8, %r9d
-; SCALAR-NEXT:    orl %eax, %r9d
-; SCALAR-NEXT:    shll $16, %r10d
-; SCALAR-NEXT:    movzwl %r9w, %eax
-; SCALAR-NEXT:    orl %r10d, %eax
-; SCALAR-NEXT:    shlq $32, %r8
-; SCALAR-NEXT:    orq %r8, %rax
+; SCALAR-NEXT:    notq %rdi
+; SCALAR-NEXT:    notq %rcx
+; SCALAR-NEXT:    notq %rax
 ; SCALAR-NEXT:    movq %rax, (%rsi)
 ; SCALAR-NEXT:    movq %rcx, 8(%rsi)
 ; SCALAR-NEXT:    movq %rdi, 16(%rsi)



More information about the llvm-commits mailing list