[llvm] r312138 - [AVX512] Don't use 32-bit elements version of AND/OR/XOR/ANDN during isel unless we're matching a masked op or broadcast

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 30 09:38:33 PDT 2017


Author: ctopper
Date: Wed Aug 30 09:38:33 2017
New Revision: 312138

URL: http://llvm.org/viewvc/llvm-project?rev=312138&view=rev
Log:
[AVX512] Don't use 32-bit elements version of AND/OR/XOR/ANDN during isel unless we're matching a masked op or broadcast

Selecting 32-bit element logical ops without a select or broadcast requires matching a bitconvert on the inputs to the and. But that's a weird thing to rely on. It's entirely possible that one of the inputs doesn't have a bitcast and one does.

Since there's no functional difference, just remove the extra patterns and save some isel table size.

Differential Revision: https://reviews.llvm.org/D36854

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-arith.ll
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
    llvm/trunk/test/CodeGen/X86/avx512-logic.ll
    llvm/trunk/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
    llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll
    llvm/trunk/test/CodeGen/X86/vector-lzcnt-512.ll
    llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll
    llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll
    llvm/trunk/test/CodeGen/X86/vector-tzcnt-512.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=312138&r1=312137&r2=312138&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Aug 30 09:38:33 2017
@@ -5094,41 +5094,51 @@ let Predicates = [HasDQI, NoVLX] in {
 // AVX-512  Logical Instructions
 //===----------------------------------------------------------------------===//
 
-multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                           X86VectorVTInfo _, bit IsCommutable = 0> {
+// OpNodeMsk is the OpNode to use when element size is important. OpNode will
+// be set to null_frag for 32-bit elements.
+multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
+                           SDPatternOperator OpNode,
+                           SDNode OpNodeMsk, X86VectorVTInfo _,
+                           bit IsCommutable = 0> {
+  let hasSideEffects = 0 in
   defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                     "$src2, $src1", "$src1, $src2",
                     (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
                                      (bitconvert (_.VT _.RC:$src2)))),
-                    (_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
-                                                       _.RC:$src2)))),
+                    (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
+                                                          _.RC:$src2)))),
                     IIC_SSE_BIT_P_RR, IsCommutable>,
             AVX512BIBase, EVEX_4V;
 
+  let hasSideEffects = 0, mayLoad = 1 in
   defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
                   "$src2, $src1", "$src1, $src2",
                   (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
                                    (bitconvert (_.LdFrag addr:$src2)))),
-                  (_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
+                  (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
                                      (bitconvert (_.LdFrag addr:$src2)))))),
                   IIC_SSE_BIT_P_RM>,
             AVX512BIBase, EVEX_4V;
 }
 
-multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                            X86VectorVTInfo _, bit IsCommutable = 0> :
-           avx512_logic_rm<opc, OpcodeStr, OpNode, _, IsCommutable> {
+// OpNodeMsk is the OpNode to use where element size is important. So use
+// for all of the broadcast patterns.
+multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
+                            SDPatternOperator OpNode,
+                            SDNode OpNodeMsk, X86VectorVTInfo _,
+                            bit IsCommutable = 0> :
+           avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, _, IsCommutable> {
   defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
                   "${src2}"##_.BroadcastStr##", $src1",
                   "$src1, ${src2}"##_.BroadcastStr,
-                  (_.i64VT (OpNode _.RC:$src1,
+                  (_.i64VT (OpNodeMsk _.RC:$src1,
                                    (bitconvert
                                     (_.VT (X86VBroadcast
                                             (_.ScalarLdFrag addr:$src2)))))),
-                  (_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
+                  (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
                                      (bitconvert
                                       (_.VT (X86VBroadcast
                                              (_.ScalarLdFrag addr:$src2)))))))),
@@ -5136,38 +5146,30 @@ multiclass avx512_logic_rmb<bits<8> opc,
              AVX512BIBase, EVEX_4V, EVEX_B;
 }
 
-multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                               AVX512VLVectorVTInfo VTInfo,
+multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
+                               SDPatternOperator OpNode,
+                               SDNode OpNodeMsk, AVX512VLVectorVTInfo VTInfo,
                                bit IsCommutable = 0> {
   let Predicates = [HasAVX512] in
-    defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info512,
+    defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, VTInfo.info512,
                              IsCommutable>, EVEX_V512;
 
   let Predicates = [HasAVX512, HasVLX] in {
-    defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
-                             IsCommutable>, EVEX_V256;
-    defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
-                             IsCommutable>, EVEX_V128;
+    defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk,
+                                 VTInfo.info256, IsCommutable>, EVEX_V256;
+    defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk,
+                                 VTInfo.info128, IsCommutable>, EVEX_V128;
   }
 }
 
-multiclass avx512_logic_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                bit IsCommutable = 0> {
-  defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
-                                  IsCommutable>, EVEX_CD8<32, CD8VF>;
-}
-
-multiclass avx512_logic_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                bit IsCommutable = 0> {
-  defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
-                                  IsCommutable>,
-                                  VEX_W, EVEX_CD8<64, CD8VF>;
-}
-
 multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
                                  SDNode OpNode, bit IsCommutable = 0> {
-  defm Q : avx512_logic_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, IsCommutable>;
-  defm D : avx512_logic_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, IsCommutable>;
+  defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode,
+                               avx512vl_i64_info, IsCommutable>,
+                               VEX_W, EVEX_CD8<64, CD8VF>;
+  defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode,
+                               avx512vl_i32_info, IsCommutable>,
+                               EVEX_CD8<32, CD8VF>;
 }
 
 defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, 1>;

Modified: llvm/trunk/test/CodeGen/X86/avx512-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-arith.ll?rev=312138&r1=312137&r2=312138&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-arith.ll Wed Aug 30 09:38:33 2017
@@ -607,17 +607,17 @@ define <8 x i64> @orq_broadcast(<8 x i64
 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
 ; AVX512F-LABEL: andd512fold:
 ; AVX512F:       # BB#0: # %entry
-; AVX512F-NEXT:    vpandd (%rdi), %zmm0, %zmm0
+; AVX512F-NEXT:    vpandq (%rdi), %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: andd512fold:
 ; AVX512VL:       # BB#0: # %entry
-; AVX512VL-NEXT:    vpandd (%rdi), %zmm0, %zmm0
+; AVX512VL-NEXT:    vpandq (%rdi), %zmm0, %zmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: andd512fold:
 ; AVX512BW:       # BB#0: # %entry
-; AVX512BW-NEXT:    vpandd (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT:    vpandq (%rdi), %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: andd512fold:

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=312138&r1=312137&r2=312138&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Wed Aug 30 09:38:33 2017
@@ -959,7 +959,7 @@ define void @test_storent_ps_512(<16 x f
 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
 ; CHECK-LABEL: test_xor_epi32:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpxord %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
 ; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
   ret < 16 x i32> %res
@@ -981,7 +981,7 @@ declare <16 x i32> @llvm.x86.avx512.mask
 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
 ; CHECK-LABEL: test_or_epi32:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
   ret < 16 x i32> %res
@@ -1003,7 +1003,7 @@ declare <16 x i32> @llvm.x86.avx512.mask
 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
 ; CHECK-LABEL: test_and_epi32:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
   ret < 16 x i32> %res

Modified: llvm/trunk/test/CodeGen/X86/avx512-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-logic.ll?rev=312138&r1=312137&r2=312138&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-logic.ll Wed Aug 30 09:38:33 2017
@@ -7,7 +7,7 @@ define <16 x i32> @vpandd(<16 x i32> %a,
 ; ALL-LABEL: vpandd:
 ; ALL:       ## BB#0: ## %entry
 ; ALL-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; ALL-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; ALL-NEXT:    retq
 entry:
   ; Force the execution domain with an add.
@@ -21,7 +21,7 @@ define <16 x i32> @vpandnd(<16 x i32> %a
 ; ALL-LABEL: vpandnd:
 ; ALL:       ## BB#0: ## %entry
 ; ALL-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; ALL-NEXT:    vpandnd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpandnq %zmm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
 entry:
   ; Force the execution domain with an add.
@@ -37,7 +37,7 @@ define <16 x i32> @vpord(<16 x i32> %a,
 ; ALL-LABEL: vpord:
 ; ALL:       ## BB#0: ## %entry
 ; ALL-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; ALL-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; ALL-NEXT:    retq
 entry:
   ; Force the execution domain with an add.
@@ -51,7 +51,7 @@ define <16 x i32> @vpxord(<16 x i32> %a,
 ; ALL-LABEL: vpxord:
 ; ALL:       ## BB#0: ## %entry
 ; ALL-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; ALL-NEXT:    vpxord %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
 ; ALL-NEXT:    retq
 entry:
   ; Force the execution domain with an add.
@@ -132,7 +132,7 @@ define <8 x i64> @orq_broadcast(<8 x i64
 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
 ; KNL-LABEL: andd512fold:
 ; KNL:       ## BB#0: ## %entry
-; KNL-NEXT:    vpandd (%rdi), %zmm0, %zmm0
+; KNL-NEXT:    vpandq (%rdi), %zmm0, %zmm0
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: andd512fold:

Modified: llvm/trunk/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll?rev=312138&r1=312137&r2=312138&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll Wed Aug 30 09:38:33 2017
@@ -1335,7 +1335,7 @@ define <16 x i32> @f16xi32_i128(<16 x i3
 ; AVX512-NEXT:    vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
 ; AVX512-NEXT:    # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
 ; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retl
 ;
 ; AVX-64-LABEL: f16xi32_i128:
@@ -1369,7 +1369,7 @@ define <16 x i32> @f16xi32_i128(<16 x i3
 ; AVX512F-64-NEXT:    vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
 ; AVX512F-64-NEXT:    # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
 ; AVX512F-64-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-64-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; AVX512F-64-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512F-64-NEXT:    retq
   %res1 = add <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %a
   %res2 = and <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %res1

Modified: llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll?rev=312138&r1=312137&r2=312138&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll Wed Aug 30 09:38:33 2017
@@ -2051,17 +2051,17 @@ define <16 x i32> @test_bitreverse_v16i3
 ; AVX512F-NEXT:    vpslld $4, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
 ; AVX512F-NEXT:    vpsrld $4, %zmm0, %zmm0
-; AVX512F-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1
 ; AVX512F-NEXT:    vpslld $2, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
 ; AVX512F-NEXT:    vpsrld $2, %zmm0, %zmm0
-; AVX512F-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1
 ; AVX512F-NEXT:    vpslld $1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
 ; AVX512F-NEXT:    vpsrld $1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_bitreverse_v16i32:

Modified: llvm/trunk/test/CodeGen/X86/vector-lzcnt-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-lzcnt-512.ll?rev=312138&r1=312137&r2=312138&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-lzcnt-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-lzcnt-512.ll Wed Aug 30 09:38:33 2017
@@ -176,7 +176,7 @@ define <16 x i32> @testv16i32(<16 x i32>
 ; AVX512BW-LABEL: testv16i32:
 ; AVX512BW:       # BB#0:
 ; AVX512BW-NEXT:    vpsrld $1, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrld $2, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrld $4, %zmm0, %zmm1
@@ -206,7 +206,7 @@ define <16 x i32> @testv16i32(<16 x i32>
 ; AVX512DQ-LABEL: testv16i32:
 ; AVX512DQ:       # BB#0:
 ; AVX512DQ-NEXT:    vpsrld $1, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrld $2, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrld $4, %zmm0, %zmm1
@@ -263,7 +263,7 @@ define <16 x i32> @testv16i32u(<16 x i32
 ; AVX512BW-LABEL: testv16i32u:
 ; AVX512BW:       # BB#0:
 ; AVX512BW-NEXT:    vpsrld $1, %zmm0, %zmm1
-; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrld $2, %zmm0, %zmm1
 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsrld $4, %zmm0, %zmm1
@@ -293,7 +293,7 @@ define <16 x i32> @testv16i32u(<16 x i32
 ; AVX512DQ-LABEL: testv16i32u:
 ; AVX512DQ:       # BB#0:
 ; AVX512DQ-NEXT:    vpsrld $1, %zmm0, %zmm1
-; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrld $2, %zmm0, %zmm1
 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vpsrld $4, %zmm0, %zmm1

Modified: llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll?rev=312138&r1=312137&r2=312138&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll Wed Aug 30 09:38:33 2017
@@ -696,7 +696,7 @@ define <16 x i32> @splatconstant_rotate_
 ; AVX512-LABEL: splatconstant_rotate_mask_v16i32:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vprold $4, %zmm0, %zmm0
-; AVX512-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %shl = shl <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   %lshr = lshr <16 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>

Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll?rev=312138&r1=312137&r2=312138&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll Wed Aug 30 09:38:33 2017
@@ -3116,7 +3116,7 @@ define <16 x i8> @trunc_and_v16i32_v16i8
 ;
 ; AVX512-LABEL: trunc_and_v16i32_v16i8:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -3830,7 +3830,7 @@ define <16 x i8> @trunc_xor_v16i32_v16i8
 ;
 ; AVX512-LABEL: trunc_xor_v16i32_v16i8:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpxord %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -4544,7 +4544,7 @@ define <16 x i8> @trunc_or_v16i32_v16i8(
 ;
 ; AVX512-LABEL: trunc_or_v16i32_v16i8:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vporq %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/vector-tzcnt-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-tzcnt-512.ll?rev=312138&r1=312137&r2=312138&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-tzcnt-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-tzcnt-512.ll Wed Aug 30 09:38:33 2017
@@ -139,7 +139,7 @@ define <16 x i32> @testv16i32(<16 x i32>
 ; AVX512CD:       # BB#0:
 ; AVX512CD-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512CD-NEXT:    vpsubd %zmm0, %zmm1, %zmm1
-; AVX512CD-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; AVX512CD-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512CD-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
 ; AVX512CD-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
 ; AVX512CD-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
@@ -175,7 +175,7 @@ define <16 x i32> @testv16i32(<16 x i32>
 ; AVX512CDBW:       # BB#0:
 ; AVX512CDBW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512CDBW-NEXT:    vpsubd %zmm0, %zmm1, %zmm2
-; AVX512CDBW-NEXT:    vpandd %zmm2, %zmm0, %zmm0
+; AVX512CDBW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
 ; AVX512CDBW-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2
 ; AVX512CDBW-NEXT:    vpaddd %zmm2, %zmm0, %zmm0
 ; AVX512CDBW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@@ -197,7 +197,7 @@ define <16 x i32> @testv16i32(<16 x i32>
 ; AVX512BW:       # BB#0:
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsubd %zmm0, %zmm1, %zmm2
-; AVX512BW-NEXT:    vpandd %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2
 ; AVX512BW-NEXT:    vpaddd %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@@ -219,7 +219,7 @@ define <16 x i32> @testv16i32(<16 x i32>
 ; AVX512VPOPCNTDQ:       # BB#0:
 ; AVX512VPOPCNTDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512VPOPCNTDQ-NEXT:    vpsubd %zmm0, %zmm1, %zmm1
-; AVX512VPOPCNTDQ-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
 ; AVX512VPOPCNTDQ-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
@@ -233,7 +233,7 @@ define <16 x i32> @testv16i32u(<16 x i32
 ; AVX512CD:       # BB#0:
 ; AVX512CD-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512CD-NEXT:    vpsubd %zmm0, %zmm1, %zmm1
-; AVX512CD-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; AVX512CD-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
 ; AVX512CD-NEXT:    vpbroadcastd {{.*#+}} zmm1 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
 ; AVX512CD-NEXT:    vpsubd %zmm0, %zmm1, %zmm0
@@ -243,7 +243,7 @@ define <16 x i32> @testv16i32u(<16 x i32
 ; AVX512CDBW:       # BB#0:
 ; AVX512CDBW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512CDBW-NEXT:    vpsubd %zmm0, %zmm1, %zmm1
-; AVX512CDBW-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; AVX512CDBW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512CDBW-NEXT:    vplzcntd %zmm0, %zmm0
 ; AVX512CDBW-NEXT:    vpbroadcastd {{.*#+}} zmm1 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
 ; AVX512CDBW-NEXT:    vpsubd %zmm0, %zmm1, %zmm0
@@ -253,7 +253,7 @@ define <16 x i32> @testv16i32u(<16 x i32
 ; AVX512BW:       # BB#0:
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsubd %zmm0, %zmm1, %zmm2
-; AVX512BW-NEXT:    vpandd %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpandq %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2
 ; AVX512BW-NEXT:    vpaddd %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@@ -275,7 +275,7 @@ define <16 x i32> @testv16i32u(<16 x i32
 ; AVX512VPOPCNTDQ:       # BB#0:
 ; AVX512VPOPCNTDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512VPOPCNTDQ-NEXT:    vpsubd %zmm0, %zmm1, %zmm1
-; AVX512VPOPCNTDQ-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    vpandq %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
 ; AVX512VPOPCNTDQ-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
 ; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0




More information about the llvm-commits mailing list