[llvm-branch-commits] [llvm] eb64697 - [X86][Codegen] Correct the domain of VP2INTERSECT

Shengchen Kan via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Nov 30 09:46:45 PST 2023


Author: Shengchen Kan
Date: 2023-11-30T17:56:21+08:00
New Revision: eb64697a7b75d2b22041cc992fad0c8dfa7989cb

URL: https://github.com/llvm/llvm-project/commit/eb64697a7b75d2b22041cc992fad0c8dfa7989cb
DIFF: https://github.com/llvm/llvm-project/commit/eb64697a7b75d2b22041cc992fad0c8dfa7989cb.diff

LOG: [X86][Codegen] Correct the domain of VP2INTERSECT

GenericDomain -> SSEPackedInt

Found by #73654

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86InstrAVX512.td
    llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll
    llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll
    llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll
    llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index f325f47d46464c3..0514f0d19506707 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12875,8 +12875,10 @@ multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _
   }
 }
 
+let ExeDomain = SSEPackedInt in {
 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
+}
 
 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
                              X86SchedWriteWidths sched,

diff  --git a/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll
index ef07d30299e9dad..9741972767bcdcd 100644
--- a/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll
@@ -84,7 +84,7 @@ define void @test_mm256_2intersect_epi32_p(ptr nocapture readonly %a, ptr nocapt
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
-; X86-NEXT:    vmovaps (%edx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x02]
+; X86-NEXT:    vmovdqa (%edx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x02]
 ; X86-NEXT:    vp2intersectd (%ecx), %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x28,0x68,0x01]
 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
 ; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
@@ -96,7 +96,7 @@ define void @test_mm256_2intersect_epi32_p(ptr nocapture readonly %a, ptr nocapt
 ;
 ; X64-LABEL: test_mm256_2intersect_epi32_p:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
+; X64-NEXT:    vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07]
 ; X64-NEXT:    vp2intersectd (%rsi), %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x28,0x68,0x06]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0]
@@ -125,7 +125,7 @@ define void @test_mm256_2intersect_epi64_p(ptr nocapture readonly %a, ptr nocapt
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
-; X86-NEXT:    vmovaps (%esi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x06]
+; X86-NEXT:    vmovdqa (%esi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x06]
 ; X86-NEXT:    vp2intersectq (%edx), %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0x02]
 ; X86-NEXT:    kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
@@ -142,7 +142,7 @@ define void @test_mm256_2intersect_epi64_p(ptr nocapture readonly %a, ptr nocapt
 ;
 ; X64-LABEL: test_mm256_2intersect_epi64_p:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
+; X64-NEXT:    vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07]
 ; X64-NEXT:    vp2intersectq (%rsi), %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0x06]
 ; X64-NEXT:    kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
@@ -175,7 +175,7 @@ define void @test_mm256_2intersect_epi32_b(ptr nocapture readonly %a, ptr nocapt
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
-; X86-NEXT:    vbroadcastss (%edx), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x02]
+; X86-NEXT:    vpbroadcastd (%edx), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x02]
 ; X86-NEXT:    vp2intersectd (%ecx){1to8}, %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x38,0x68,0x01]
 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
 ; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
@@ -187,7 +187,7 @@ define void @test_mm256_2intersect_epi32_b(ptr nocapture readonly %a, ptr nocapt
 ;
 ; X64-LABEL: test_mm256_2intersect_epi32_b:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vbroadcastss (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x07]
+; X64-NEXT:    vpbroadcastd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x07]
 ; X64-NEXT:    vp2intersectd (%rsi){1to8}, %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x38,0x68,0x06]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0]
@@ -220,7 +220,7 @@ define void @test_mm256_2intersect_epi64_b(ptr nocapture readonly %a, ptr nocapt
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
-; X86-NEXT:    vbroadcastsd (%esi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x06]
+; X86-NEXT:    vpbroadcastq (%esi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x06]
 ; X86-NEXT:    vp2intersectq (%edx){1to4}, %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x38,0x68,0x02]
 ; X86-NEXT:    kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
@@ -237,7 +237,7 @@ define void @test_mm256_2intersect_epi64_b(ptr nocapture readonly %a, ptr nocapt
 ;
 ; X64-LABEL: test_mm256_2intersect_epi64_b:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vbroadcastsd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x07]
+; X64-NEXT:    vpbroadcastq (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x07]
 ; X64-NEXT:    vp2intersectq (%rsi){1to4}, %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x38,0x68,0x06]
 ; X64-NEXT:    kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
@@ -362,7 +362,7 @@ define void @test_mm_2intersect_epi32_p(ptr nocapture readonly %a, ptr nocapture
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
-; X86-NEXT:    vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06]
+; X86-NEXT:    vmovdqa (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x06]
 ; X86-NEXT:    vp2intersectd (%edx), %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x08,0x68,0x02]
 ; X86-NEXT:    kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
@@ -378,7 +378,7 @@ define void @test_mm_2intersect_epi32_p(ptr nocapture readonly %a, ptr nocapture
 ;
 ; X64-LABEL: test_mm_2intersect_epi32_p:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
+; X64-NEXT:    vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07]
 ; X64-NEXT:    vp2intersectd (%rsi), %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x08,0x68,0x06]
 ; X64-NEXT:    kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
@@ -414,7 +414,7 @@ define void @test_mm_2intersect_epi64_p(ptr nocapture readonly %a, ptr nocapture
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
-; X86-NEXT:    vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06]
+; X86-NEXT:    vmovdqa (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x06]
 ; X86-NEXT:    vp2intersectq (%edx), %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0x02]
 ; X86-NEXT:    kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
 ; X86-NEXT:    kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
@@ -430,7 +430,7 @@ define void @test_mm_2intersect_epi64_p(ptr nocapture readonly %a, ptr nocapture
 ;
 ; X64-LABEL: test_mm_2intersect_epi64_p:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
+; X64-NEXT:    vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07]
 ; X64-NEXT:    vp2intersectq (%rsi), %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0x06]
 ; X64-NEXT:    kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
 ; X64-NEXT:    kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
@@ -466,7 +466,7 @@ define void @test_mm_2intersect_epi32_b(ptr nocapture readonly %a, ptr nocapture
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
-; X86-NEXT:    vbroadcastss (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x06]
+; X86-NEXT:    vpbroadcastd (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x06]
 ; X86-NEXT:    vp2intersectd (%edx){1to4}, %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x18,0x68,0x02]
 ; X86-NEXT:    kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
@@ -482,7 +482,7 @@ define void @test_mm_2intersect_epi32_b(ptr nocapture readonly %a, ptr nocapture
 ;
 ; X64-LABEL: test_mm_2intersect_epi32_b:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07]
+; X64-NEXT:    vpbroadcastd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x07]
 ; X64-NEXT:    vp2intersectd (%rsi){1to4}, %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x18,0x68,0x06]
 ; X64-NEXT:    kshiftlw $12, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
@@ -522,8 +522,7 @@ define void @test_mm_2intersect_epi64_b(ptr nocapture readonly %a, ptr nocapture
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
-; X86-NEXT:    vmovddup (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x06]
-; X86-NEXT:    # xmm0 = mem[0,0]
+; X86-NEXT:    vpbroadcastq (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x06]
 ; X86-NEXT:    vp2intersectq (%edx){1to2}, %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x18,0x68,0x02]
 ; X86-NEXT:    kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
 ; X86-NEXT:    kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
@@ -539,8 +538,7 @@ define void @test_mm_2intersect_epi64_b(ptr nocapture readonly %a, ptr nocapture
 ;
 ; X64-LABEL: test_mm_2intersect_epi64_b:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07]
-; X64-NEXT:    # xmm0 = mem[0,0]
+; X64-NEXT:    vpbroadcastq (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x07]
 ; X64-NEXT:    vp2intersectq (%rsi){1to2}, %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x18,0x68,0x06]
 ; X64-NEXT:    kshiftlw $14, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
 ; X64-NEXT:    kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]

diff  --git a/llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll
index 39c57e65b48524e..28e3d6dd5d84999 100644
--- a/llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll
@@ -72,7 +72,7 @@ define void @test_mm512_2intersect_epi32_p(ptr nocapture readonly %a, ptr nocapt
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
-; X86-NEXT:    vmovaps (%esi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x06]
+; X86-NEXT:    vmovdqa64 (%esi), %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x06]
 ; X86-NEXT:    vp2intersectd (%edx), %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x48,0x68,0x02]
 ; X86-NEXT:    kmovw %k0, (%ecx) # encoding: [0xc5,0xf8,0x91,0x01]
 ; X86-NEXT:    kmovw %k1, (%eax) # encoding: [0xc5,0xf8,0x91,0x08]
@@ -83,7 +83,7 @@ define void @test_mm512_2intersect_epi32_p(ptr nocapture readonly %a, ptr nocapt
 ;
 ; X64-LABEL: test_mm512_2intersect_epi32_p:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vmovaps (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
+; X64-NEXT:    vmovdqa64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
 ; X64-NEXT:    vp2intersectd (%rsi), %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x48,0x68,0x06]
 ; X64-NEXT:    kmovw %k0, (%rdx) # encoding: [0xc5,0xf8,0x91,0x02]
 ; X64-NEXT:    kmovw %k1, (%rcx) # encoding: [0xc5,0xf8,0x91,0x09]
@@ -106,7 +106,7 @@ define void @test_mm512_2intersect_epi64_p(ptr nocapture readonly %a, ptr nocapt
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
-; X86-NEXT:    vmovaps (%edx), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x02]
+; X86-NEXT:    vmovdqa64 (%edx), %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x02]
 ; X86-NEXT:    vp2intersectq (%ecx), %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0x01]
 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
 ; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
@@ -118,7 +118,7 @@ define void @test_mm512_2intersect_epi64_p(ptr nocapture readonly %a, ptr nocapt
 ;
 ; X64-LABEL: test_mm512_2intersect_epi64_p:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vmovaps (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
+; X64-NEXT:    vmovdqa64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
 ; X64-NEXT:    vp2intersectq (%rsi), %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0x06]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0]
@@ -148,7 +148,7 @@ define void @test_mm512_2intersect_epi32_b(ptr nocapture readonly %a, ptr nocapt
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
-; X86-NEXT:    vbroadcastss (%esi), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x18,0x06]
+; X86-NEXT:    vpbroadcastd (%esi), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x58,0x06]
 ; X86-NEXT:    vp2intersectd (%edx){1to16}, %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x58,0x68,0x02]
 ; X86-NEXT:    kmovw %k0, (%ecx) # encoding: [0xc5,0xf8,0x91,0x01]
 ; X86-NEXT:    kmovw %k1, (%eax) # encoding: [0xc5,0xf8,0x91,0x08]
@@ -159,7 +159,7 @@ define void @test_mm512_2intersect_epi32_b(ptr nocapture readonly %a, ptr nocapt
 ;
 ; X64-LABEL: test_mm512_2intersect_epi32_b:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vbroadcastss (%rdi), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x18,0x07]
+; X64-NEXT:    vpbroadcastd (%rdi), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x58,0x07]
 ; X64-NEXT:    vp2intersectd (%rsi){1to16}, %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x58,0x68,0x06]
 ; X64-NEXT:    kmovw %k0, (%rdx) # encoding: [0xc5,0xf8,0x91,0x02]
 ; X64-NEXT:    kmovw %k1, (%rcx) # encoding: [0xc5,0xf8,0x91,0x09]
@@ -186,7 +186,7 @@ define void @test_mm512_2intersect_epi64_b(ptr nocapture readonly %a, ptr nocapt
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
-; X86-NEXT:    vbroadcastsd (%edx), %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x19,0x02]
+; X86-NEXT:    vpbroadcastq (%edx), %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x02]
 ; X86-NEXT:    vp2intersectq (%ecx){1to8}, %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x58,0x68,0x01]
 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
 ; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
@@ -198,7 +198,7 @@ define void @test_mm512_2intersect_epi64_b(ptr nocapture readonly %a, ptr nocapt
 ;
 ; X64-LABEL: test_mm512_2intersect_epi64_b:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vbroadcastsd (%rdi), %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x19,0x07]
+; X64-NEXT:    vpbroadcastq (%rdi), %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x07]
 ; X64-NEXT:    vp2intersectq (%rsi){1to8}, %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x58,0x68,0x06]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0]

diff  --git a/llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll b/llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll
index 909a0a4feae0560..e2057a293255bbb 100644
--- a/llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll
@@ -11,7 +11,7 @@ define void @stack_fold_vp2intersectd(ptr %a, <16 x i32> %b, ptr nocapture %m0,
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    vmovaps (%rdi), %zmm0
+; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm0
 ; CHECK-NEXT:    vp2intersectd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k0 # 64-byte Folded Reload
 ; CHECK-NEXT:    kmovw %k0, (%rsi)
 ; CHECK-NEXT:    kmovw %k1, (%rdx)
@@ -35,7 +35,7 @@ define void @stack_fold_vp2intersectq(ptr %a, <8 x i64> %b, ptr nocapture %m0, p
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    vmovaps (%rdi), %zmm0
+; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm0
 ; CHECK-NEXT:    vp2intersectq {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %k0 # 64-byte Folded Reload
 ; CHECK-NEXT:    kmovw %k1, %eax
 ; CHECK-NEXT:    kmovw %k0, %ecx
@@ -61,7 +61,7 @@ define void @stack_fold_vp2intersectd_256(ptr %a, <8 x i32> %b, ptr nocapture %m
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    vmovaps (%rdi), %ymm0
+; CHECK-NEXT:    vmovdqa (%rdi), %ymm0
 ; CHECK-NEXT:    vp2intersectd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %k0 # 32-byte Folded Reload
 ; CHECK-NEXT:    kmovw %k1, %eax
 ; CHECK-NEXT:    kmovw %k0, %ecx
@@ -87,7 +87,7 @@ define void @stack_fold_vp2intersectq_256(ptr %a, <4 x i64> %b, ptr nocapture %m
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    vmovaps (%rdi), %ymm0
+; CHECK-NEXT:    vmovdqa (%rdi), %ymm0
 ; CHECK-NEXT:    vp2intersectq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %k0 # 32-byte Folded Reload
 ; CHECK-NEXT:    kshiftlw $12, %k0, %k2
 ; CHECK-NEXT:    kshiftrw $12, %k2, %k2
@@ -117,7 +117,7 @@ define void @stack_fold_vp2intersectd_128(ptr %a, <4 x i32> %b, ptr nocapture %m
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    vmovaps (%rdi), %xmm0
+; CHECK-NEXT:    vmovdqa (%rdi), %xmm0
 ; CHECK-NEXT:    vp2intersectd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %k0 # 16-byte Folded Reload
 ; CHECK-NEXT:    kshiftlw $12, %k0, %k2
 ; CHECK-NEXT:    kshiftrw $12, %k2, %k2
@@ -146,7 +146,7 @@ define void @stack_fold_vp2intersectq_128(ptr %a, <2 x i64> %b, ptr nocapture %m
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    vmovaps (%rdi), %xmm0
+; CHECK-NEXT:    vmovdqa (%rdi), %xmm0
 ; CHECK-NEXT:    vp2intersectq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %k0 # 16-byte Folded Reload
 ; CHECK-NEXT:    kshiftlw $14, %k0, %k2
 ; CHECK-NEXT:    kshiftrw $14, %k2, %k2

diff  --git a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
index a2affbd8728c23c..9f2f1d57c2dbc0d 100644
--- a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
+++ b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
@@ -14,9 +14,9 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1
 ; X86-NEXT:    andl $-64, %esp
 ; X86-NEXT:    subl $64, %esp
 ; X86-NEXT:    movl 456(%ebp), %esi
-; X86-NEXT:    vmovaps 328(%ebp), %zmm3
-; X86-NEXT:    vmovaps 200(%ebp), %zmm4
-; X86-NEXT:    vmovaps 72(%ebp), %zmm5
+; X86-NEXT:    vmovdqa64 328(%ebp), %zmm3
+; X86-NEXT:    vmovdqa64 200(%ebp), %zmm4
+; X86-NEXT:    vmovdqa64 72(%ebp), %zmm5
 ; X86-NEXT:    vp2intersectd %zmm1, %zmm0, %k0
 ; X86-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
 ; X86-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
@@ -70,7 +70,7 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1
 ; X64-NEXT:    andq $-64, %rsp
 ; X64-NEXT:    subq $64, %rsp
 ; X64-NEXT:    movq %rdi, %rbx
-; X64-NEXT:    vmovaps 16(%rbp), %zmm8
+; X64-NEXT:    vmovdqa64 16(%rbp), %zmm8
 ; X64-NEXT:    vp2intersectd %zmm1, %zmm0, %k0
 ; X64-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
 ; X64-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill


        


More information about the llvm-branch-commits mailing list