[llvm] r371368 - [X86] Add broadcast load unfolding support for vpcmpeq/vpcmpgt/vpcmp/vpcmpu.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 9 00:46:11 PDT 2019


Author: ctopper
Date: Mon Sep  9 00:46:11 2019
New Revision: 371368

URL: http://llvm.org/viewvc/llvm-project?rev=371368&view=rev
Log:
[X86] Add broadcast load unfolding support for vpcmpeq/vpcmpgt/vpcmp/vpcmpu.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrFoldTables.cpp
    llvm/trunk/test/CodeGen/X86/avx512-broadcast-unfold.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrFoldTables.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFoldTables.cpp?rev=371368&r1=371367&r2=371368&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFoldTables.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFoldTables.cpp Mon Sep  9 00:46:11 2019
@@ -5306,6 +5306,30 @@ static const X86MemoryFoldTableEntry Bro
   { X86::VPANDQZ128rr,   X86::VPANDQZ128rmb,   TB_BCAST_Q },
   { X86::VPANDQZ256rr,   X86::VPANDQZ256rmb,   TB_BCAST_Q },
   { X86::VPANDQZrr,      X86::VPANDQZrmb,      TB_BCAST_Q },
+  { X86::VPCMPDZ128rri,  X86::VPCMPDZ128rmib,  TB_BCAST_D },
+  { X86::VPCMPDZ256rri,  X86::VPCMPDZ256rmib,  TB_BCAST_D },
+  { X86::VPCMPDZrri,     X86::VPCMPDZrmib,     TB_BCAST_D },
+  { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rmb, TB_BCAST_D },
+  { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rmb, TB_BCAST_D },
+  { X86::VPCMPEQDZrr,    X86::VPCMPEQDZrmb,    TB_BCAST_D },
+  { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rmb, TB_BCAST_Q },
+  { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rmb, TB_BCAST_Q },
+  { X86::VPCMPEQQZrr,    X86::VPCMPEQQZrmb,    TB_BCAST_Q },
+  { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rmb, TB_BCAST_D },
+  { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rmb, TB_BCAST_D },
+  { X86::VPCMPGTDZrr,    X86::VPCMPGTDZrmb,    TB_BCAST_D },
+  { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rmb, TB_BCAST_Q },
+  { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rmb, TB_BCAST_Q },
+  { X86::VPCMPGTQZrr,    X86::VPCMPGTQZrmb,    TB_BCAST_Q },
+  { X86::VPCMPQZ128rri,  X86::VPCMPQZ128rmib,  TB_BCAST_Q },
+  { X86::VPCMPQZ256rri,  X86::VPCMPQZ256rmib,  TB_BCAST_Q },
+  { X86::VPCMPQZrri,     X86::VPCMPQZrmib,     TB_BCAST_Q },
+  { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmib, TB_BCAST_D },
+  { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmib, TB_BCAST_D },
+  { X86::VPCMPUDZrri,    X86::VPCMPUDZrmib,    TB_BCAST_D },
+  { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmib, TB_BCAST_Q },
+  { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmib, TB_BCAST_Q },
+  { X86::VPCMPUQZrri,    X86::VPCMPUQZrmib,    TB_BCAST_Q },
   { X86::VPMAXSDZ128rr,  X86::VPMAXSDZ128rmb,  TB_BCAST_D },
   { X86::VPMAXSDZ256rr,  X86::VPMAXSDZ256rmb,  TB_BCAST_D },
   { X86::VPMAXSDZrr,     X86::VPMAXSDZrmb,     TB_BCAST_D },

Modified: llvm/trunk/test/CodeGen/X86/avx512-broadcast-unfold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-broadcast-unfold.ll?rev=371368&r1=371367&r2=371368&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-broadcast-unfold.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-broadcast-unfold.ll Mon Sep  9 00:46:11 2019
@@ -3339,13 +3339,14 @@ define void @bcast_unfold_pcmpgt_v4i32(i
 ; CHECK-LABEL: bcast_unfold_pcmpgt_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB96_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu 4096(%rdi,%rax), %xmm0
-; CHECK-NEXT:    vpcmpgtd {{.*}}(%rip){1to4}, %xmm0, %k1
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1}
-; CHECK-NEXT:    vmovdqu %xmm0, 4096(%rdi,%rax)
+; CHECK-NEXT:    vmovdqu 4096(%rdi,%rax), %xmm1
+; CHECK-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1 {%k1}
+; CHECK-NEXT:    vmovdqu %xmm1, 4096(%rdi,%rax)
 ; CHECK-NEXT:    addq $16, %rax
 ; CHECK-NEXT:    jne .LBB96_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -3374,13 +3375,14 @@ define void @bcast_unfold_pcmpgt_v8i32(i
 ; CHECK-LABEL: bcast_unfold_pcmpgt_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [1,1,1,1,1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB97_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu 4096(%rdi,%rax), %ymm0
-; CHECK-NEXT:    vpcmpgtd {{.*}}(%rip){1to8}, %ymm0, %k1
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm0 {%k1}
-; CHECK-NEXT:    vmovdqu %ymm0, 4096(%rdi,%rax)
+; CHECK-NEXT:    vmovdqu 4096(%rdi,%rax), %ymm1
+; CHECK-NEXT:    vpcmpgtd %ymm0, %ymm1, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1 {%k1}
+; CHECK-NEXT:    vmovdqu %ymm1, 4096(%rdi,%rax)
 ; CHECK-NEXT:    addq $32, %rax
 ; CHECK-NEXT:    jne .LBB97_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -3410,13 +3412,14 @@ define void @bcast_unfold_pcmpgt_v16i32(
 ; CHECK-LABEL: bcast_unfold_pcmpgt_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB98_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu64 4096(%rdi,%rax), %zmm0
-; CHECK-NEXT:    vpcmpgtd {{.*}}(%rip){1to16}, %zmm0, %k1
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1}
-; CHECK-NEXT:    vmovdqu64 %zmm0, 4096(%rdi,%rax)
+; CHECK-NEXT:    vmovdqu64 4096(%rdi,%rax), %zmm1
+; CHECK-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqu64 %zmm1, 4096(%rdi,%rax)
 ; CHECK-NEXT:    addq $64, %rax
 ; CHECK-NEXT:    jne .LBB98_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -3481,13 +3484,14 @@ define void @bcast_unfold_pcmpgt_v4i64(i
 ; CHECK-LABEL: bcast_unfold_pcmpgt_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
+; CHECK-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB100_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu 8192(%rdi,%rax), %ymm0
-; CHECK-NEXT:    vpcmpgtq {{.*}}(%rip){1to4}, %ymm0, %k1
-; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm0 {%k1}
-; CHECK-NEXT:    vmovdqu %ymm0, 8192(%rdi,%rax)
+; CHECK-NEXT:    vmovdqu 8192(%rdi,%rax), %ymm1
+; CHECK-NEXT:    vpcmpgtq %ymm0, %ymm1, %k1
+; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1 {%k1}
+; CHECK-NEXT:    vmovdqu %ymm1, 8192(%rdi,%rax)
 ; CHECK-NEXT:    addq $32, %rax
 ; CHECK-NEXT:    jne .LBB100_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -3517,13 +3521,14 @@ define void @bcast_unfold_pcmpgt_v8i64(i
 ; CHECK-LABEL: bcast_unfold_pcmpgt_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
+; CHECK-NEXT:    vpbroadcastq {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB101_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu64 8192(%rdi,%rax), %zmm0
-; CHECK-NEXT:    vpcmpgtq {{.*}}(%rip){1to8}, %zmm0, %k1
-; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1}
-; CHECK-NEXT:    vmovdqu64 %zmm0, 8192(%rdi,%rax)
+; CHECK-NEXT:    vmovdqu64 8192(%rdi,%rax), %zmm1
+; CHECK-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
+; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqu64 %zmm1, 8192(%rdi,%rax)
 ; CHECK-NEXT:    addq $64, %rax
 ; CHECK-NEXT:    jne .LBB101_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -3553,13 +3558,14 @@ define void @bcast_unfold_pcmpeq_v4i32(i
 ; CHECK-LABEL: bcast_unfold_pcmpeq_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB102_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu 4096(%rdi,%rax), %xmm0
-; CHECK-NEXT:    vpcmpeqd {{.*}}(%rip){1to4}, %xmm0, %k1
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1}
-; CHECK-NEXT:    vmovdqu %xmm0, 4096(%rdi,%rax)
+; CHECK-NEXT:    vmovdqu 4096(%rdi,%rax), %xmm1
+; CHECK-NEXT:    vpcmpeqd %xmm0, %xmm1, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1 {%k1}
+; CHECK-NEXT:    vmovdqu %xmm1, 4096(%rdi,%rax)
 ; CHECK-NEXT:    addq $16, %rax
 ; CHECK-NEXT:    jne .LBB102_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -3588,13 +3594,14 @@ define void @bcast_unfold_pcmpeq_v8i32(i
 ; CHECK-LABEL: bcast_unfold_pcmpeq_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [1,1,1,1,1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB103_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu 4096(%rdi,%rax), %ymm0
-; CHECK-NEXT:    vpcmpeqd {{.*}}(%rip){1to8}, %ymm0, %k1
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm0 {%k1}
-; CHECK-NEXT:    vmovdqu %ymm0, 4096(%rdi,%rax)
+; CHECK-NEXT:    vmovdqu 4096(%rdi,%rax), %ymm1
+; CHECK-NEXT:    vpcmpeqd %ymm0, %ymm1, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1 {%k1}
+; CHECK-NEXT:    vmovdqu %ymm1, 4096(%rdi,%rax)
 ; CHECK-NEXT:    addq $32, %rax
 ; CHECK-NEXT:    jne .LBB103_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -3624,13 +3631,14 @@ define void @bcast_unfold_pcmpeq_v16i32(
 ; CHECK-LABEL: bcast_unfold_pcmpeq_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB104_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu64 4096(%rdi,%rax), %zmm0
-; CHECK-NEXT:    vpcmpeqd {{.*}}(%rip){1to16}, %zmm0, %k1
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1}
-; CHECK-NEXT:    vmovdqu64 %zmm0, 4096(%rdi,%rax)
+; CHECK-NEXT:    vmovdqu64 4096(%rdi,%rax), %zmm1
+; CHECK-NEXT:    vpcmpeqd %zmm0, %zmm1, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqu64 %zmm1, 4096(%rdi,%rax)
 ; CHECK-NEXT:    addq $64, %rax
 ; CHECK-NEXT:    jne .LBB104_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -3695,13 +3703,14 @@ define void @bcast_unfold_pcmpeq_v4i64(i
 ; CHECK-LABEL: bcast_unfold_pcmpeq_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
+; CHECK-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB106_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu 8192(%rdi,%rax), %ymm0
-; CHECK-NEXT:    vpcmpeqq {{.*}}(%rip){1to4}, %ymm0, %k1
-; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm0 {%k1}
-; CHECK-NEXT:    vmovdqu %ymm0, 8192(%rdi,%rax)
+; CHECK-NEXT:    vmovdqu 8192(%rdi,%rax), %ymm1
+; CHECK-NEXT:    vpcmpeqq %ymm0, %ymm1, %k1
+; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1 {%k1}
+; CHECK-NEXT:    vmovdqu %ymm1, 8192(%rdi,%rax)
 ; CHECK-NEXT:    addq $32, %rax
 ; CHECK-NEXT:    jne .LBB106_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -3731,13 +3740,14 @@ define void @bcast_unfold_pcmpeq_v8i64(i
 ; CHECK-LABEL: bcast_unfold_pcmpeq_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
+; CHECK-NEXT:    vpbroadcastq {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB107_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu64 8192(%rdi,%rax), %zmm0
-; CHECK-NEXT:    vpcmpeqq {{.*}}(%rip){1to8}, %zmm0, %k1
-; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1}
-; CHECK-NEXT:    vmovdqu64 %zmm0, 8192(%rdi,%rax)
+; CHECK-NEXT:    vmovdqu64 8192(%rdi,%rax), %zmm1
+; CHECK-NEXT:    vpcmpeqq %zmm0, %zmm1, %k1
+; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqu64 %zmm1, 8192(%rdi,%rax)
 ; CHECK-NEXT:    addq $64, %rax
 ; CHECK-NEXT:    jne .LBB107_1
 ; CHECK-NEXT:  # %bb.2: # %bb10
@@ -3767,13 +3777,14 @@ define void @bcast_unfold_pcmp_v4i32(i32
 ; CHECK-LABEL: bcast_unfold_pcmp_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB108_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu (%rdi,%rax,4), %xmm0
-; CHECK-NEXT:    vpcmpltd {{.*}}(%rip){1to4}, %xmm0, %k1
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1}
-; CHECK-NEXT:    vmovdqu %xmm0, (%rdi,%rax,4)
+; CHECK-NEXT:    vmovdqu (%rdi,%rax,4), %xmm1
+; CHECK-NEXT:    vpcmpltd %xmm0, %xmm1, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1 {%k1}
+; CHECK-NEXT:    vmovdqu %xmm1, (%rdi,%rax,4)
 ; CHECK-NEXT:    addq $4, %rax
 ; CHECK-NEXT:    cmpq $1023, %rax # imm = 0x3FF
 ; CHECK-NEXT:    jg .LBB108_1
@@ -3803,13 +3814,14 @@ define void @bcast_unfold_pcmp_v8i32(i32
 ; CHECK-LABEL: bcast_unfold_pcmp_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [1,1,1,1,1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB109_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu (%rdi,%rax,4), %ymm0
-; CHECK-NEXT:    vpcmpltd {{.*}}(%rip){1to8}, %ymm0, %k1
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm0 {%k1}
-; CHECK-NEXT:    vmovdqu %ymm0, (%rdi,%rax,4)
+; CHECK-NEXT:    vmovdqu (%rdi,%rax,4), %ymm1
+; CHECK-NEXT:    vpcmpltd %ymm0, %ymm1, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1 {%k1}
+; CHECK-NEXT:    vmovdqu %ymm1, (%rdi,%rax,4)
 ; CHECK-NEXT:    addq $8, %rax
 ; CHECK-NEXT:    cmpq $1023, %rax # imm = 0x3FF
 ; CHECK-NEXT:    jg .LBB109_1
@@ -3840,13 +3852,14 @@ define void @bcast_unfold_pcmp_v16i32(i3
 ; CHECK-LABEL: bcast_unfold_pcmp_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB110_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu64 (%rdi,%rax,4), %zmm0
-; CHECK-NEXT:    vpcmpltd {{.*}}(%rip){1to16}, %zmm0, %k1
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1}
-; CHECK-NEXT:    vmovdqu64 %zmm0, (%rdi,%rax,4)
+; CHECK-NEXT:    vmovdqu64 (%rdi,%rax,4), %zmm1
+; CHECK-NEXT:    vpcmpltd %zmm0, %zmm1, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqu64 %zmm1, (%rdi,%rax,4)
 ; CHECK-NEXT:    addq $16, %rax
 ; CHECK-NEXT:    cmpq $1023, %rax # imm = 0x3FF
 ; CHECK-NEXT:    jg .LBB110_1
@@ -3913,13 +3926,14 @@ define void @bcast_unfold_pcmp_v4i64(i64
 ; CHECK-LABEL: bcast_unfold_pcmp_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB112_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu (%rdi,%rax,8), %ymm0
-; CHECK-NEXT:    vpcmpltq {{.*}}(%rip){1to4}, %ymm0, %k1
-; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm0 {%k1}
-; CHECK-NEXT:    vmovdqu %ymm0, (%rdi,%rax,8)
+; CHECK-NEXT:    vmovdqu (%rdi,%rax,8), %ymm1
+; CHECK-NEXT:    vpcmpltq %ymm0, %ymm1, %k1
+; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1 {%k1}
+; CHECK-NEXT:    vmovdqu %ymm1, (%rdi,%rax,8)
 ; CHECK-NEXT:    addq $4, %rax
 ; CHECK-NEXT:    cmpq $1023, %rax # imm = 0x3FF
 ; CHECK-NEXT:    jg .LBB112_1
@@ -3950,13 +3964,14 @@ define void @bcast_unfold_pcmp_v8i64(i64
 ; CHECK-LABEL: bcast_unfold_pcmp_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    vpbroadcastq {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB113_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu64 (%rdi,%rax,8), %zmm0
-; CHECK-NEXT:    vpcmpltq {{.*}}(%rip){1to8}, %zmm0, %k1
-; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1}
-; CHECK-NEXT:    vmovdqu64 %zmm0, (%rdi,%rax,8)
+; CHECK-NEXT:    vmovdqu64 (%rdi,%rax,8), %zmm1
+; CHECK-NEXT:    vpcmpltq %zmm0, %zmm1, %k1
+; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqu64 %zmm1, (%rdi,%rax,8)
 ; CHECK-NEXT:    addq $8, %rax
 ; CHECK-NEXT:    cmpq $1023, %rax # imm = 0x3FF
 ; CHECK-NEXT:    jg .LBB113_1
@@ -3987,13 +4002,14 @@ define void @bcast_unfold_pcmpu_v4i32(i3
 ; CHECK-LABEL: bcast_unfold_pcmpu_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB114_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu (%rdi,%rax,4), %xmm0
-; CHECK-NEXT:    vpcmpltud {{.*}}(%rip){1to4}, %xmm0, %k1
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1}
-; CHECK-NEXT:    vmovdqu %xmm0, (%rdi,%rax,4)
+; CHECK-NEXT:    vmovdqu (%rdi,%rax,4), %xmm1
+; CHECK-NEXT:    vpcmpltud %xmm0, %xmm1, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1 {%k1}
+; CHECK-NEXT:    vmovdqu %xmm1, (%rdi,%rax,4)
 ; CHECK-NEXT:    addq $4, %rax
 ; CHECK-NEXT:    cmpq $1023, %rax # imm = 0x3FF
 ; CHECK-NEXT:    ja .LBB114_1
@@ -4023,13 +4039,14 @@ define void @bcast_unfold_pcmpu_v8i32(i3
 ; CHECK-LABEL: bcast_unfold_pcmpu_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB115_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu (%rdi,%rax,4), %ymm0
-; CHECK-NEXT:    vpcmpltud {{.*}}(%rip){1to8}, %ymm0, %k1
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm0 {%k1}
-; CHECK-NEXT:    vmovdqu %ymm0, (%rdi,%rax,4)
+; CHECK-NEXT:    vmovdqu (%rdi,%rax,4), %ymm1
+; CHECK-NEXT:    vpcmpltud %ymm0, %ymm1, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1 {%k1}
+; CHECK-NEXT:    vmovdqu %ymm1, (%rdi,%rax,4)
 ; CHECK-NEXT:    addq $8, %rax
 ; CHECK-NEXT:    cmpq $1023, %rax # imm = 0x3FF
 ; CHECK-NEXT:    ja .LBB115_1
@@ -4060,13 +4077,14 @@ define void @bcast_unfold_pcmpu_v16i32(i
 ; CHECK-LABEL: bcast_unfold_pcmpu_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB116_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu64 (%rdi,%rax,4), %zmm0
-; CHECK-NEXT:    vpcmpltud {{.*}}(%rip){1to16}, %zmm0, %k1
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1}
-; CHECK-NEXT:    vmovdqu64 %zmm0, (%rdi,%rax,4)
+; CHECK-NEXT:    vmovdqu64 (%rdi,%rax,4), %zmm1
+; CHECK-NEXT:    vpcmpltud %zmm0, %zmm1, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqu64 %zmm1, (%rdi,%rax,4)
 ; CHECK-NEXT:    addq $16, %rax
 ; CHECK-NEXT:    cmpq $1023, %rax # imm = 0x3FF
 ; CHECK-NEXT:    ja .LBB116_1
@@ -4133,13 +4151,14 @@ define void @bcast_unfold_pcmpu_v4i64(i6
 ; CHECK-LABEL: bcast_unfold_pcmpu_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [2,2,2,2]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB118_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu (%rdi,%rax,8), %ymm0
-; CHECK-NEXT:    vpcmpltuq {{.*}}(%rip){1to4}, %ymm0, %k1
-; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm0 {%k1}
-; CHECK-NEXT:    vmovdqu %ymm0, (%rdi,%rax,8)
+; CHECK-NEXT:    vmovdqu (%rdi,%rax,8), %ymm1
+; CHECK-NEXT:    vpcmpltuq %ymm0, %ymm1, %k1
+; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1 {%k1}
+; CHECK-NEXT:    vmovdqu %ymm1, (%rdi,%rax,8)
 ; CHECK-NEXT:    addq $4, %rax
 ; CHECK-NEXT:    cmpq $1023, %rax # imm = 0x3FF
 ; CHECK-NEXT:    ja .LBB118_1
@@ -4170,13 +4189,14 @@ define void @bcast_unfold_pcmpu_v8i64(i6
 ; CHECK-LABEL: bcast_unfold_pcmpu_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    vpbroadcastq {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2]
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB119_1: # %bb1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vmovdqu64 (%rdi,%rax,8), %zmm0
-; CHECK-NEXT:    vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1
-; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1}
-; CHECK-NEXT:    vmovdqu64 %zmm0, (%rdi,%rax,8)
+; CHECK-NEXT:    vmovdqu64 (%rdi,%rax,8), %zmm1
+; CHECK-NEXT:    vpcmpltuq %zmm0, %zmm1, %k1
+; CHECK-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqu64 %zmm1, (%rdi,%rax,8)
 ; CHECK-NEXT:    addq $8, %rax
 ; CHECK-NEXT:    cmpq $1023, %rax # imm = 0x3FF
 ; CHECK-NEXT:    ja .LBB119_1




More information about the llvm-commits mailing list