[llvm] [X86] VPTERNLOG comments - use "mem" just for full width loads and "bst32" / "bst64" (PR #143721)
Shamshura Egor via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 11 11:56:33 PDT 2025
https://github.com/egorshamshura updated https://github.com/llvm/llvm-project/pull/143721
>From adfc61b22c3dc0ff4490921962f52874668e2b03 Mon Sep 17 00:00:00 2001
From: Shamshura Egor <shamshuraegor at gmail.com>
Date: Wed, 11 Jun 2025 15:05:50 +0000
Subject: [PATCH 1/3] use "mem" just for full width loads and "bst32" / "bst64"
---
.../X86/MCTargetDesc/X86InstComments.cpp | 27 +++++++++++++--
.../any_extend_vector_inreg_of_broadcast.ll | 16 ++++-----
...d_vector_inreg_of_broadcast_from_memory.ll | 16 ++++-----
llvm/test/CodeGen/X86/avgfloors.ll | 12 +++----
llvm/test/CodeGen/X86/avx512-cvt.ll | 2 +-
llvm/test/CodeGen/X86/avx512-logic.ll | 4 +--
llvm/test/CodeGen/X86/avx512fp16-arith.ll | 6 ++--
llvm/test/CodeGen/X86/avx512vl-logic.ll | 8 ++---
llvm/test/CodeGen/X86/combine-bitselect.ll | 6 ++--
llvm/test/CodeGen/X86/combine-or-shuffle.ll | 2 +-
llvm/test/CodeGen/X86/fp-round.ll | 34 +++++++++----------
llvm/test/CodeGen/X86/gfni-funnel-shifts.ll | 12 +++----
llvm/test/CodeGen/X86/gfni-shifts.ll | 6 ++--
llvm/test/CodeGen/X86/midpoint-int-vec-128.ll | 10 +++---
llvm/test/CodeGen/X86/midpoint-int-vec-256.ll | 10 +++---
.../CodeGen/X86/min-legal-vector-width.ll | 12 +++----
llvm/test/CodeGen/X86/pmul.ll | 4 +--
llvm/test/CodeGen/X86/psubus.ll | 6 ++--
llvm/test/CodeGen/X86/sadd_sat_vec.ll | 2 +-
.../CodeGen/X86/srem-seteq-vec-nonsplat.ll | 2 +-
llvm/test/CodeGen/X86/ssub_sat_vec.ll | 4 +--
llvm/test/CodeGen/X86/usub_sat_vec.ll | 2 +-
llvm/test/CodeGen/X86/vector-fshl-128.ll | 12 +++----
llvm/test/CodeGen/X86/vector-fshl-256.ll | 12 +++----
llvm/test/CodeGen/X86/vector-fshl-512.ll | 12 +++----
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll | 12 +++----
llvm/test/CodeGen/X86/vector-fshl-rot-256.ll | 22 ++++++------
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll | 12 +++----
llvm/test/CodeGen/X86/vector-fshr-128.ll | 12 +++----
llvm/test/CodeGen/X86/vector-fshr-256.ll | 12 +++----
llvm/test/CodeGen/X86/vector-fshr-512.ll | 12 +++----
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll | 12 +++----
llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 24 ++++++-------
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll | 12 +++----
llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll | 6 ++--
llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll | 2 +-
.../vector-interleaved-store-i16-stride-5.ll | 8 ++---
.../vector-interleaved-store-i16-stride-7.ll | 16 ++++-----
llvm/test/CodeGen/X86/vector-rotate-128.ll | 12 +++----
llvm/test/CodeGen/X86/vector-rotate-256.ll | 22 ++++++------
llvm/test/CodeGen/X86/vector-rotate-512.ll | 32 ++++++++---------
.../test/CodeGen/X86/vector-shift-ashr-128.ll | 4 +--
.../test/CodeGen/X86/vector-shift-ashr-256.ll | 4 +--
.../test/CodeGen/X86/vector-shift-ashr-512.ll | 2 +-
.../CodeGen/X86/vector-shift-ashr-sub128.ll | 12 +++----
llvm/test/CodeGen/X86/vector-shift-shl-256.ll | 2 +-
llvm/test/CodeGen/X86/vector-shift-shl-512.ll | 4 +--
.../test/CodeGen/X86/vector-shuffle-avx512.ll | 2 +-
llvm/test/CodeGen/X86/vselect-pcmp.ll | 4 +--
.../zero_extend_vector_inreg_of_broadcast.ll | 8 ++---
...d_vector_inreg_of_broadcast_from_memory.ll | 8 ++---
51 files changed, 269 insertions(+), 248 deletions(-)
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 4c26fc86f9547..98a05e4c2c0e7 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -931,10 +931,18 @@ static bool printPTERNLOGComments(const MCInst *MI, raw_ostream &OS,
// dest, src1, mask, src2, memory, tbl
CASE_PTERNLOG(PTERNLOGD, m)
CASE_PTERNLOG(PTERNLOGQ, m)
+ Src2Idx = NumOperands - 7;
+ Src3Idx = -1;
+ break;
+
CASE_PTERNLOG(PTERNLOGD, mb)
+ Src2Idx = NumOperands - 7;
+ Src3Idx = -2;
+ break;
+
CASE_PTERNLOG(PTERNLOGQ, mb)
Src2Idx = NumOperands - 7;
- Src3Idx = -1;
+ Src3Idx = -3;
break;
default:
@@ -943,8 +951,21 @@ static bool printPTERNLOGComments(const MCInst *MI, raw_ostream &OS,
StringRef DestName = getRegName(MI->getOperand(0).getReg());
StringRef Src1Name = getRegName(MI->getOperand(1).getReg());
StringRef Src2Name = getRegName(MI->getOperand(Src2Idx).getReg());
- StringRef Src3Name =
- Src3Idx != -1 ? getRegName(MI->getOperand(Src3Idx).getReg()) : "mem";
+ StringRef Src3Name;
+ switch (Src3Idx) {
+ case -1:
+ Src3Name = "mem";
+ break;
+ case -2:
+ Src3Name = "bst32";
+ break;
+ case -3:
+ Src3Name = "bst64";
+ break;
+ default:
+ Src3Name = getRegName(MI->getOperand(Src3Idx).getReg());
+ break;
+ }
uint8_t TruthTable = MI->getOperand(NumOperands - 1).getImm();
StringRef SrcNames[] = {Src1Name, Src2Name, Src3Name};
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
index 7d2915ddc75b1..749a42918b9fb 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
@@ -1235,7 +1235,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.v
; AVX512F-NEXT: vmovdqa (%rdi), %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512F-NEXT: vzeroupper
@@ -1248,7 +1248,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.v
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512DQ-NEXT: vzeroupper
@@ -1359,7 +1359,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.v
; AVX512F-NEXT: vmovdqa (%rdi), %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512F-NEXT: vzeroupper
@@ -1372,7 +1372,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.v
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512DQ-NEXT: vzeroupper
@@ -2702,7 +2702,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm0 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm0 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2717,7 +2717,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm0 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm0 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2964,7 +2964,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.v
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm0 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm0 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2979,7 +2979,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.v
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm0 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm0 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
index f5802150d5353..2f780c31e8290 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -1020,7 +1020,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.e
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512F-NEXT: vzeroupper
@@ -1030,7 +1030,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.e
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm0
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512DQ-NEXT: vzeroupper
@@ -1116,7 +1116,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.e
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512F-NEXT: vzeroupper
@@ -1126,7 +1126,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.e
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm0
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512DQ-NEXT: vzeroupper
@@ -2125,7 +2125,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm0
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512F-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
+; AVX512F-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (bst32 & (ymm0 ^ ymm1))
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2137,7 +2137,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm0
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (bst32 & (ymm0 ^ ymm1))
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2346,7 +2346,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.e
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm0
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512F-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
+; AVX512F-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (bst64 & (ymm0 ^ ymm1))
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2358,7 +2358,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.e
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm0
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (bst64 & (ymm0 ^ ymm1))
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
diff --git a/llvm/test/CodeGen/X86/avgfloors.ll b/llvm/test/CodeGen/X86/avgfloors.ll
index 0508e5ccb5430..d6edc5d6358a1 100644
--- a/llvm/test/CodeGen/X86/avgfloors.ll
+++ b/llvm/test/CodeGen/X86/avgfloors.ll
@@ -53,7 +53,7 @@ define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind {
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
-; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
@@ -108,7 +108,7 @@ define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind {
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
-; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
@@ -405,7 +405,7 @@ define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind {
; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
-; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & bst32)
; AVX512-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
@@ -478,7 +478,7 @@ define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind {
; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
-; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & bst32)
; AVX512-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
@@ -966,7 +966,7 @@ define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind {
; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $1, %zmm0, %zmm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
-; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & bst32)
; AVX512-NEXT: vpaddb %zmm2, %zmm0, %zmm0
; AVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
@@ -1078,7 +1078,7 @@ define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind {
; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $1, %zmm0, %zmm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
-; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & bst32)
; AVX512-NEXT: vpaddb %zmm2, %zmm0, %zmm0
; AVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index a78d97782e6a3..ad68e1ce36949 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -351,7 +351,7 @@ define <8 x double> @ulto8f64(<8 x i64> %a) {
; NODQ-LABEL: ulto8f64:
; NODQ: # %bb.0:
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
-; NODQ-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
+; NODQ-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & bst64)
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
; NODQ-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
; NODQ-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll
index 23f4fcb1c77c6..ac98e9bd010e7 100644
--- a/llvm/test/CodeGen/X86/avx512-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512-logic.ll
@@ -889,7 +889,7 @@ define <16 x i32> @ternlog_xor_andn(<16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
define <16 x i32> @ternlog_or_and_mask(<16 x i32> %x, <16 x i32> %y) {
; ALL-LABEL: ternlog_or_and_mask:
; ALL: ## %bb.0:
-; ALL-NEXT: vpternlogd {{.*#+}} zmm0 = (zmm0 & mem) | zmm1
+; ALL-NEXT: vpternlogd {{.*#+}} zmm0 = (zmm0 & bst32) | zmm1
; ALL-NEXT: retq
%a = and <16 x i32> %x, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
%b = or <16 x i32> %a, %y
@@ -899,7 +899,7 @@ define <16 x i32> @ternlog_or_and_mask(<16 x i32> %x, <16 x i32> %y) {
define <8 x i64> @ternlog_xor_and_mask(<8 x i64> %x, <8 x i64> %y) {
; ALL-LABEL: ternlog_xor_and_mask:
; ALL: ## %bb.0:
-; ALL-NEXT: vpternlogq {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem)
+; ALL-NEXT: vpternlogq {{.*#+}} zmm0 = zmm1 ^ (zmm0 & bst64)
; ALL-NEXT: retq
%a = and <8 x i64> %x, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%b = xor <8 x i64> %a, %y
diff --git a/llvm/test/CodeGen/X86/avx512fp16-arith.ll b/llvm/test/CodeGen/X86/avx512fp16-arith.ll
index b264f5fc34688..cdf6526465fd5 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-arith.ll
@@ -384,7 +384,7 @@ declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
define <8 x half> @fcopysignv8f16(<8 x half> %x, <8 x half> %y) {
; CHECK-LABEL: fcopysignv8f16:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (mem & (xmm0 ^ xmm1))
+; CHECK-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (bst32 & (xmm0 ^ xmm1))
; CHECK-NEXT: retq
%a = call <8 x half> @llvm.copysign.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %a
@@ -439,7 +439,7 @@ declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
define <16 x half> @fcopysignv16f16(<16 x half> %x, <16 x half> %y) {
; CHECK-LABEL: fcopysignv16f16:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
+; CHECK-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (bst32 & (ymm0 ^ ymm1))
; CHECK-NEXT: retq
%a = call <16 x half> @llvm.copysign.v16f16(<16 x half> %x, <16 x half> %y)
ret <16 x half> %a
@@ -494,7 +494,7 @@ declare <32 x half> @llvm.fabs.v32f16(<32 x half>)
define <32 x half> @fcopysignv32f16(<32 x half> %x, <32 x half> %y) {
; CHECK-LABEL: fcopysignv32f16:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (mem & (zmm0 ^ zmm1))
+; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (bst32 & (zmm0 ^ zmm1))
; CHECK-NEXT: retq
%a = call <32 x half> @llvm.copysign.v32f16(<32 x half> %x, <32 x half> %y)
ret <32 x half> %a
diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll
index 284a0eb33047c..cbf930fc46498 100644
--- a/llvm/test/CodeGen/X86/avx512vl-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll
@@ -1039,7 +1039,7 @@ define <4 x i32> @ternlog_xor_andn(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
define <4 x i32> @ternlog_or_and_mask(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: ternlog_or_and_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd {{.*#+}} xmm0 = (xmm0 & mem) | xmm1
+; CHECK-NEXT: vpternlogd {{.*#+}} xmm0 = (xmm0 & bst32) | xmm1
; CHECK-NEXT: retq
%a = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
%b = or <4 x i32> %a, %y
@@ -1049,7 +1049,7 @@ define <4 x i32> @ternlog_or_and_mask(<4 x i32> %x, <4 x i32> %y) {
define <8 x i32> @ternlog_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y) {
; CHECK-LABEL: ternlog_or_and_mask_ymm:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd {{.*#+}} ymm0 = (ymm0 & mem) | ymm1
+; CHECK-NEXT: vpternlogd {{.*#+}} ymm0 = (ymm0 & bst32) | ymm1
; CHECK-NEXT: retq
%a = and <8 x i32> %x, <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
%b = or <8 x i32> %a, %y
@@ -1059,7 +1059,7 @@ define <8 x i32> @ternlog_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y) {
define <2 x i64> @ternlog_xor_and_mask(<2 x i64> %x, <2 x i64> %y) {
; CHECK-LABEL: ternlog_xor_and_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; CHECK-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst64)
; CHECK-NEXT: retq
%a = and <2 x i64> %x, <i64 1099511627775, i64 1099511627775>
%b = xor <2 x i64> %a, %y
@@ -1069,7 +1069,7 @@ define <2 x i64> @ternlog_xor_and_mask(<2 x i64> %x, <2 x i64> %y) {
define <4 x i64> @ternlog_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y) {
; CHECK-LABEL: ternlog_xor_and_mask_ymm:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm0 & mem)
+; CHECK-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm0 & bst64)
; CHECK-NEXT: retq
%a = and <4 x i64> %x, <i64 72057594037927935, i64 72057594037927935, i64 72057594037927935, i64 72057594037927935>
%b = xor <4 x i64> %a, %y
diff --git a/llvm/test/CodeGen/X86/combine-bitselect.ll b/llvm/test/CodeGen/X86/combine-bitselect.ll
index 25c26d598881a..cdd2c52aa4419 100644
--- a/llvm/test/CodeGen/X86/combine-bitselect.ll
+++ b/llvm/test/CodeGen/X86/combine-bitselect.ll
@@ -283,7 +283,7 @@ define <2 x i64> @bitselect_v2i64_broadcast_rrm(<2 x i64> %a0, <2 x i64> %a1, pt
;
; AVX512VL-LABEL: bitselect_v2i64_broadcast_rrm:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (mem & (xmm0 ^ xmm1))
+; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (bst64 & (xmm0 ^ xmm1))
; AVX512VL-NEXT: retq
%a2 = load i64, ptr %p2
%1 = insertelement <2 x i64> undef, i64 %a2, i32 0
@@ -604,7 +604,7 @@ define <4 x i64> @bitselect_v4i64_broadcast_rrm(<4 x i64> %a0, <4 x i64> %a1, pt
;
; AVX512VL-LABEL: bitselect_v4i64_broadcast_rrm:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
+; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (bst64 & (ymm0 ^ ymm1))
; AVX512VL-NEXT: retq
%a2 = load i64, ptr %p2
%1 = insertelement <4 x i64> undef, i64 %a2, i32 0
@@ -975,7 +975,7 @@ define <8 x i64> @bitselect_v8i64_broadcast_rrm(<8 x i64> %a0, <8 x i64> %a1, pt
;
; AVX512-LABEL: bitselect_v8i64_broadcast_rrm:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = zmm1 ^ (mem & (zmm0 ^ zmm1))
+; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = zmm1 ^ (bst64 & (zmm0 ^ zmm1))
; AVX512-NEXT: retq
%a2 = load i64, ptr %p2
%1 = insertelement <8 x i64> undef, i64 %a2, i32 0
diff --git a/llvm/test/CodeGen/X86/combine-or-shuffle.ll b/llvm/test/CodeGen/X86/combine-or-shuffle.ll
index 95b5fcf8eac52..2f46bc63b0623 100644
--- a/llvm/test/CodeGen/X86/combine-or-shuffle.ll
+++ b/llvm/test/CodeGen/X86/combine-or-shuffle.ll
@@ -808,7 +808,7 @@ define <2 x i64> @or_and_v2i64(<2 x i64> %a0) {
; AVX512-LABEL: or_and_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7]
-; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 & (xmm0 | mem)
+; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 & (xmm0 | bst64)
; AVX512-NEXT: retq
%1 = and <2 x i64> %a0, <i64 7, i64 7>
%2 = or <2 x i64> %1, <i64 3, i64 3>
diff --git a/llvm/test/CodeGen/X86/fp-round.ll b/llvm/test/CodeGen/X86/fp-round.ll
index e98fb8e374c0b..13782840d3216 100644
--- a/llvm/test/CodeGen/X86/fp-round.ll
+++ b/llvm/test/CodeGen/X86/fp-round.ll
@@ -52,7 +52,7 @@ define half @round_f16(half %h) {
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
+; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & bst32)
; AVX512F-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -98,7 +98,7 @@ define float @round_f32(float %x) {
; AVX512F-LABEL: round_f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
+; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & bst32)
; AVX512F-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq
@@ -106,7 +106,7 @@ define float @round_f32(float %x) {
; AVX512FP16-LABEL: round_f32:
; AVX512FP16: ## %bb.0:
; AVX512FP16-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512FP16-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
+; AVX512FP16-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & bst32)
; AVX512FP16-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512FP16-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX512FP16-NEXT: retq
@@ -142,7 +142,7 @@ define double @round_f64(double %x) {
; AVX512F-LABEL: round_f64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512F-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
+; AVX512F-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & bst64)
; AVX512F-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq
@@ -150,7 +150,7 @@ define double @round_f64(double %x) {
; AVX512FP16-LABEL: round_f64:
; AVX512FP16: ## %bb.0:
; AVX512FP16-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512FP16-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
+; AVX512FP16-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & bst64)
; AVX512FP16-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512FP16-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
; AVX512FP16-NEXT: retq
@@ -208,7 +208,7 @@ define <4 x float> @round_v4f32(<4 x float> %x) {
; AVX512F-LABEL: round_v4f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
+; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & bst32)
; AVX512F-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vroundps $11, %xmm0, %xmm0
; AVX512F-NEXT: retq
@@ -216,7 +216,7 @@ define <4 x float> @round_v4f32(<4 x float> %x) {
; AVX512FP16-LABEL: round_v4f32:
; AVX512FP16: ## %bb.0:
; AVX512FP16-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512FP16-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
+; AVX512FP16-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & bst32)
; AVX512FP16-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX512FP16-NEXT: vroundps $11, %xmm0, %xmm0
; AVX512FP16-NEXT: retq
@@ -262,7 +262,7 @@ define <2 x double> @round_v2f64(<2 x double> %x) {
; AVX512F-LABEL: round_v2f64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512F-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
+; AVX512F-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & bst64)
; AVX512F-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vroundpd $11, %xmm0, %xmm0
; AVX512F-NEXT: retq
@@ -270,7 +270,7 @@ define <2 x double> @round_v2f64(<2 x double> %x) {
; AVX512FP16-LABEL: round_v2f64:
; AVX512FP16: ## %bb.0:
; AVX512FP16-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512FP16-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
+; AVX512FP16-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & bst64)
; AVX512FP16-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX512FP16-NEXT: vroundpd $11, %xmm0, %xmm0
; AVX512FP16-NEXT: retq
@@ -356,7 +356,7 @@ define <8 x float> @round_v8f32(<8 x float> %x) {
; AVX512F-LABEL: round_v8f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
+; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 | (ymm0 & bst32)
; AVX512F-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vroundps $11, %ymm0, %ymm0
; AVX512F-NEXT: retq
@@ -364,7 +364,7 @@ define <8 x float> @round_v8f32(<8 x float> %x) {
; AVX512FP16-LABEL: round_v8f32:
; AVX512FP16: ## %bb.0:
; AVX512FP16-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512FP16-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
+; AVX512FP16-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 | (ymm0 & bst32)
; AVX512FP16-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX512FP16-NEXT: vroundps $11, %ymm0, %ymm0
; AVX512FP16-NEXT: retq
@@ -426,7 +426,7 @@ define <4 x double> @round_v4f64(<4 x double> %x) {
; AVX512F-LABEL: round_v4f64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
+; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 | (ymm0 & bst64)
; AVX512F-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vroundpd $11, %ymm0, %ymm0
; AVX512F-NEXT: retq
@@ -434,7 +434,7 @@ define <4 x double> @round_v4f64(<4 x double> %x) {
; AVX512FP16-LABEL: round_v4f64:
; AVX512FP16: ## %bb.0:
; AVX512FP16-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512FP16-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
+; AVX512FP16-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 | (ymm0 & bst64)
; AVX512FP16-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX512FP16-NEXT: vroundpd $11, %ymm0, %ymm0
; AVX512FP16-NEXT: retq
@@ -582,7 +582,7 @@ define <16 x float> @round_v16f32(<16 x float> %x) {
; AVX512F-LABEL: round_v16f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm0 & bst32)
; AVX512F-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vrndscaleps $11, %zmm0, %zmm0
; AVX512F-NEXT: retq
@@ -590,7 +590,7 @@ define <16 x float> @round_v16f32(<16 x float> %x) {
; AVX512FP16-LABEL: round_v16f32:
; AVX512FP16: ## %bb.0:
; AVX512FP16-NEXT: vpbroadcastd {{.*#+}} zmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX512FP16-NEXT: vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
+; AVX512FP16-NEXT: vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm0 & bst32)
; AVX512FP16-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512FP16-NEXT: vrndscaleps $11, %zmm0, %zmm0
; AVX512FP16-NEXT: retq
@@ -690,7 +690,7 @@ define <8 x double> @round_v8f64(<8 x double> %x) {
; AVX512F-LABEL: round_v8f64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512F-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
+; AVX512F-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & bst64)
; AVX512F-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vrndscalepd $11, %zmm0, %zmm0
; AVX512F-NEXT: retq
@@ -698,7 +698,7 @@ define <8 x double> @round_v8f64(<8 x double> %x) {
; AVX512FP16-LABEL: round_v8f64:
; AVX512FP16: ## %bb.0:
; AVX512FP16-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX512FP16-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
+; AVX512FP16-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & bst64)
; AVX512FP16-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512FP16-NEXT: vrndscalepd $11, %zmm0, %zmm0
; AVX512FP16-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll
index 0ca3380d188b7..c7a64434565cf 100644
--- a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll
+++ b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll
@@ -492,7 +492,7 @@ define <16 x i8> @splatconstant_fshl_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsllw $3, %xmm0, %xmm2
; GFNIAVX512-NEXT: vpsrlw $5, %xmm1, %xmm0
-; GFNIAVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm2))
+; GFNIAVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm2))
; GFNIAVX512-NEXT: retq
%res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
ret <16 x i8> %res
@@ -518,7 +518,7 @@ define <16 x i8> @splatconstant_fshr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpaddw %xmm0, %xmm0, %xmm2
; GFNIAVX512-NEXT: vpsrlw $7, %xmm1, %xmm0
-; GFNIAVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm2))
+; GFNIAVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm2))
; GFNIAVX512-NEXT: retq
%res = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>)
ret <16 x i8> %res
@@ -1311,7 +1311,7 @@ define <32 x i8> @splatconstant_fshl_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsllw $4, %ymm0, %ymm2
; GFNIAVX512-NEXT: vpsrlw $4, %ymm1, %ymm0
-; GFNIAVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm2))
+; GFNIAVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm2))
; GFNIAVX512-NEXT: retq
%res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <32 x i8> %res
@@ -1349,7 +1349,7 @@ define <32 x i8> @splatconstant_fshr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind
; GFNIAVX512: # %bb.0:
; GFNIAVX512-NEXT: vpsllw $2, %ymm0, %ymm2
; GFNIAVX512-NEXT: vpsrlw $6, %ymm1, %ymm0
-; GFNIAVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm2))
+; GFNIAVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm2))
; GFNIAVX512-NEXT: retq
%res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>)
ret <32 x i8> %res
@@ -2775,7 +2775,7 @@ define <64 x i8> @splatconstant_fshl_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind
; GFNIAVX512BW: # %bb.0:
; GFNIAVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm2
; GFNIAVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm0
-; GFNIAVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; GFNIAVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; GFNIAVX512BW-NEXT: retq
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <64 x i8> %res
@@ -2836,7 +2836,7 @@ define <64 x i8> @splatconstant_fshr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind
; GFNIAVX512BW: # %bb.0:
; GFNIAVX512BW-NEXT: vpsllw $6, %zmm0, %zmm2
; GFNIAVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm0
-; GFNIAVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; GFNIAVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; GFNIAVX512BW-NEXT: retq
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>)
ret <64 x i8> %res
diff --git a/llvm/test/CodeGen/X86/gfni-shifts.ll b/llvm/test/CodeGen/X86/gfni-shifts.ll
index 5cd1a2c76762e..2f0fe9507b33e 100644
--- a/llvm/test/CodeGen/X86/gfni-shifts.ll
+++ b/llvm/test/CodeGen/X86/gfni-shifts.ll
@@ -1261,7 +1261,7 @@ define <32 x i8> @constant_shl_v32i8(<32 x i8> %a) nounwind {
; GFNIAVX512VL-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0]
; GFNIAVX512VL-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1]
; GFNIAVX512VL-NEXT: vpsllw $8, %ymm0, %ymm0
-; GFNIAVX512VL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 | (ymm1 & mem)
+; GFNIAVX512VL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 | (ymm1 & bst32)
; GFNIAVX512VL-NEXT: retq
;
; GFNIAVX512BW-LABEL: constant_shl_v32i8:
@@ -2634,7 +2634,7 @@ define <64 x i8> @constant_shl_v64i8(<64 x i8> %a) nounwind {
; GFNIAVX512VL-NEXT: vpmaddubsw %ymm3, %ymm1, %ymm1
; GFNIAVX512VL-NEXT: vpsllw $8, %ymm1, %ymm1
; GFNIAVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; GFNIAVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 | (zmm2 & mem)
+; GFNIAVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 | (zmm2 & bst32)
; GFNIAVX512VL-NEXT: retq
;
; GFNIAVX512BW-LABEL: constant_shl_v64i8:
@@ -2642,7 +2642,7 @@ define <64 x i8> @constant_shl_v64i8(<64 x i8> %a) nounwind {
; GFNIAVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 # [1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0]
; GFNIAVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1]
; GFNIAVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0
-; GFNIAVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 | (zmm1 & mem)
+; GFNIAVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 | (zmm1 & bst32)
; GFNIAVX512BW-NEXT: retq
%shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <64 x i8> %shift
diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
index ac6b7e54ca5b5..6b85207b83371 100644
--- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
@@ -2500,7 +2500,7 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1
-; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} xmm2 = xmm1 ^ (xmm2 & mem)
+; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} xmm2 = xmm1 ^ (xmm2 & bst32)
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm1, %xmm2, %xmm1
; AVX512VL-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -2729,7 +2729,7 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VL-FALLBACK-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
; AVX512VL-FALLBACK-NEXT: vpternlogq {{.*#+}} xmm2 = ~xmm2
-; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} xmm1 = xmm2 ^ (xmm1 & mem)
+; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} xmm1 = xmm2 ^ (xmm1 & bst32)
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm1, %xmm1
; AVX512VL-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -2961,7 +2961,7 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} xmm2 = xmm0 ^ (xmm2 & mem)
+; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} xmm2 = xmm0 ^ (xmm2 & bst32)
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm0, %xmm2, %xmm0
; AVX512VL-FALLBACK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -3192,7 +3192,7 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1
-; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} xmm2 = xmm1 ^ (xmm2 & mem)
+; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} xmm2 = xmm1 ^ (xmm2 & bst32)
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm1, %xmm2, %xmm1
; AVX512VL-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -3432,7 +3432,7 @@ define <16 x i8> @vec128_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm3, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1
-; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} xmm2 = xmm1 ^ (xmm2 & mem)
+; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} xmm2 = xmm1 ^ (xmm2 & bst32)
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm1, %xmm2, %xmm1
; AVX512VL-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; AVX512VL-FALLBACK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
index a8021e3164f34..baeea72fc41cf 100644
--- a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
@@ -2016,7 +2016,7 @@ define <32 x i8> @vec256_i8_signed_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounwin
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm3, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm1
-; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} ymm2 = ymm1 ^ (ymm2 & mem)
+; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} ymm2 = ymm1 ^ (ymm2 & bst32)
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm2, %ymm1
; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -2194,7 +2194,7 @@ define <32 x i8> @vec256_i8_unsigned_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounw
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
; AVX512VL-FALLBACK-NEXT: vpternlogq {{.*#+}} ymm2 = ~ymm2
-; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} ymm1 = ymm2 ^ (ymm1 & mem)
+; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} ymm1 = ymm2 ^ (ymm1 & bst32)
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -2372,7 +2372,7 @@ define <32 x i8> @vec256_i8_signed_mem_reg(ptr %a1_addr, <32 x i8> %a2) nounwind
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm3, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} ymm2 = ymm0 ^ (ymm2 & mem)
+; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} ymm2 = ymm0 ^ (ymm2 & bst32)
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm0, %ymm2, %ymm0
; AVX512VL-FALLBACK-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -2550,7 +2550,7 @@ define <32 x i8> @vec256_i8_signed_reg_mem(<32 x i8> %a1, ptr %a2_addr) nounwind
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm3, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm1
-; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} ymm2 = ymm1 ^ (ymm2 & mem)
+; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} ymm2 = ymm1 ^ (ymm2 & bst32)
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm2, %ymm1
; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; AVX512VL-FALLBACK-NEXT: retq
@@ -2733,7 +2733,7 @@ define <32 x i8> @vec256_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm3, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm1
-; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} ymm2 = ymm1 ^ (ymm2 & mem)
+; AVX512VL-FALLBACK-NEXT: vpternlogd {{.*#+}} ymm2 = ymm1 ^ (ymm2 & bst32)
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm2, %ymm1
; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; AVX512VL-FALLBACK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
index 24c884211cf97..f275ffe576af4 100644
--- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll
+++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
@@ -1993,21 +1993,21 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind "min-legal-v
; CHECK-SKX: # %bb.0:
; CHECK-SKX-NEXT: vpsllw $4, %ymm0, %ymm1
; CHECK-SKX-NEXT: vpsrlw $4, %ymm0, %ymm0
-; CHECK-SKX-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; CHECK-SKX-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; CHECK-SKX-NEXT: retq
;
; CHECK-AVX512-LABEL: splatconstant_rotate_v32i8:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpsllw $4, %ymm0, %ymm1
; CHECK-AVX512-NEXT: vpsrlw $4, %ymm0, %ymm0
-; CHECK-AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; CHECK-AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; CHECK-AVX512-NEXT: retq
;
; CHECK-VBMI1-LABEL: splatconstant_rotate_v32i8:
; CHECK-VBMI1: # %bb.0:
; CHECK-VBMI1-NEXT: vpsllw $4, %ymm0, %ymm1
; CHECK-VBMI1-NEXT: vpsrlw $4, %ymm0, %ymm0
-; CHECK-VBMI1-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; CHECK-VBMI1-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; CHECK-VBMI1-NEXT: retq
;
; CHECK-GFNI-LABEL: splatconstant_rotate_v32i8:
@@ -2025,7 +2025,7 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind "min-le
; CHECK-SKX: # %bb.0:
; CHECK-SKX-NEXT: vpsllw $4, %ymm0, %ymm1
; CHECK-SKX-NEXT: vpsrlw $4, %ymm0, %ymm0
-; CHECK-SKX-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; CHECK-SKX-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; CHECK-SKX-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; CHECK-SKX-NEXT: retq
;
@@ -2033,7 +2033,7 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind "min-le
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpsllw $4, %ymm0, %ymm1
; CHECK-AVX512-NEXT: vpsrlw $4, %ymm0, %ymm0
-; CHECK-AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; CHECK-AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; CHECK-AVX512-NEXT: retq
;
@@ -2041,7 +2041,7 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind "min-le
; CHECK-VBMI1: # %bb.0:
; CHECK-VBMI1-NEXT: vpsllw $4, %ymm0, %ymm1
; CHECK-VBMI1-NEXT: vpsrlw $4, %ymm0, %ymm0
-; CHECK-VBMI1-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; CHECK-VBMI1-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; CHECK-VBMI1-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; CHECK-VBMI1-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll
index c7cc2acaf2627..49b51599bbe5f 100644
--- a/llvm/test/CodeGen/X86/pmul.ll
+++ b/llvm/test/CodeGen/X86/pmul.ll
@@ -832,7 +832,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind {
; AVX512F-NEXT: vpmaddubsw %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpsllw $8, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 | (zmm2 & mem)
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 | (zmm2 & bst32)
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: mul_v64i8c:
@@ -840,7 +840,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind {
; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 # [117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0]
; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117,0,117]
; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 | (zmm1 & mem)
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 | (zmm1 & bst32)
; AVX512BW-NEXT: retq
entry:
%A = mul <64 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index 73ee28a7fd247..727e9c93d7e1c 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -146,7 +146,7 @@ define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind {
; AVX512-LABEL: ashr_xor_and_custom:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm1
-; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 & (xmm0 ^ mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 & (xmm0 ^ bst32)
; AVX512-NEXT: retq
%signsplat = ashr <4 x i32> %x, <i32 undef, i32 31, i32 31, i32 31>
%flipsign = xor <4 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
@@ -187,7 +187,7 @@ define <4 x i32> @ashr_add_and_custom(<4 x i32> %x) nounwind {
; AVX512-LABEL: ashr_add_and_custom:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm1
-; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 & (xmm0 ^ mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 & (xmm0 ^ bst32)
; AVX512-NEXT: retq
%signsplat = ashr <4 x i32> %x, <i32 undef, i32 31, i32 31, i32 31>
%flipsign = add <4 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
@@ -230,7 +230,7 @@ define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
; AVX512-LABEL: usubsat_custom:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm1
-; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 & (xmm0 ^ mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 & (xmm0 ^ bst32)
; AVX512-NEXT: retq
%res = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 undef>)
ret <4 x i32> %res
diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
index bd563f97b0ac4..fb01dd91cbcc4 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -567,7 +567,7 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; AVX512BW-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; AVX512BW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512BW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
%z = call <16 x i4> @llvm.sadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
index 08d9183bd30b6..c662668db08bf 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
@@ -2461,7 +2461,7 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) {
; CHECK-AVX512VL-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 # [197,0,27,0,1,0,1,0,223,0,205,0,161,0,171,0,171,0,183,0,61,0,127,0,9,0,41,0,1,0,161,0]
; CHECK-AVX512VL-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 # [0,205,0,241,0,1,0,163,0,223,0,183,0,1,0,239,0,103,0,171,0,1,0,183,0,0,0,183,0,1,0,221]
; CHECK-AVX512VL-NEXT: vpsllw $8, %ymm3, %ymm3
-; CHECK-AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 | (ymm2 & mem)
+; CHECK-AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 | (ymm2 & bst32)
; CHECK-AVX512VL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm2
; CHECK-AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [128,128,1,1,1,128,1,64,128,1,128,1,128,32,1,1]
diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
index 88df3c175ec9c..29685237a5ccc 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
@@ -567,7 +567,7 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; AVX512BW-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
-; AVX512BW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512BW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: retq
%z = call <16 x i4> @llvm.ssub.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
@@ -601,7 +601,7 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
;
; AVX512BW-LABEL: v16i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 & (xmm1 ^ mem)
+; AVX512BW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 & (xmm1 ^ bst32)
; AVX512BW-NEXT: retq
%z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll
index 4e17ca6fbae33..00df4eb5220dd 100644
--- a/llvm/test/CodeGen/X86/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll
@@ -543,7 +543,7 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
;
; AVX512BW-LABEL: v16i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 & (xmm1 ^ mem)
+; AVX512BW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 & (xmm1 ^ bst32)
; AVX512BW-NEXT: retq
%z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
index fd0525e6d56a2..a377144b3e12c 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -2407,7 +2407,7 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm2
; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
@@ -2416,14 +2416,14 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm2
; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm2))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm2))
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm2
; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -2432,7 +2432,7 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm2
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
@@ -2441,14 +2441,14 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm2
; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm2))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm2))
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm2
; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0
-; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm2))
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm2))
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index fdd0d68b89003..c98764b198a72 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -2296,7 +2296,7 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: retq
;
@@ -2304,14 +2304,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm2))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm2))
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
@@ -2319,7 +2319,7 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
@@ -2327,14 +2327,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm2))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm2))
; AVX512VLBW-NEXT: retq
;
; AVX10-LABEL: splatconstant_funnnel_v32i8:
; AVX10: # %bb.0:
; AVX10-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX10-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX10-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm2))
+; AVX10-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm2))
; AVX10-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll
index 1d807fa85ddc5..33ab6d2005427 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll
@@ -1124,7 +1124,7 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
@@ -1137,35 +1137,35 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
-; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512VLVBMI2-NEXT: retq
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
index b763b7bac2432..2504d1739c8ab 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -1859,7 +1859,7 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
@@ -1868,14 +1868,14 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm1))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm1))
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -1884,14 +1884,14 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm1))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm1))
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
@@ -1900,7 +1900,7 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm1))
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm1))
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
index 9e872cc6d74a9..84b5a5e403140 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
@@ -443,12 +443,12 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (mem & (zmm3 ^ zmm2))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (bst32 & (zmm3 ^ zmm2))
; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm2
; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm3
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (mem & (zmm3 ^ zmm2))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (bst32 & (zmm3 ^ zmm2))
; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2
@@ -463,17 +463,17 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (mem & (ymm3 ^ ymm2))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (bst32 & (ymm3 ^ ymm2))
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (mem & (ymm3 ^ ymm2))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (bst32 & (ymm3 ^ ymm2))
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 | (ymm2 & mem)
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 | (ymm2 & bst32)
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
@@ -1641,7 +1641,7 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: retq
;
@@ -1649,14 +1649,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
@@ -1664,14 +1664,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
@@ -1679,7 +1679,7 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
index 436fbe31f7a34..04dab3184e1ae 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -721,7 +721,7 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
@@ -733,35 +733,35 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VLVBMI2-NEXT: retq
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
index a56b0a6351a3b..7a1ca3cb01eea 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -2412,7 +2412,7 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm2
; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
@@ -2421,14 +2421,14 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm2
; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm2))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm2))
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm2
; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -2437,7 +2437,7 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm2
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
@@ -2446,14 +2446,14 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm2
; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm2))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm2))
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm2
; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0
-; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm2))
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm2))
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
index 0fa2c858ff000..d8023f4b886d7 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -2096,7 +2096,7 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: retq
;
@@ -2104,14 +2104,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm2))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm2))
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
@@ -2119,7 +2119,7 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
@@ -2127,14 +2127,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm2))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm2))
; AVX512VLBW-NEXT: retq
;
; AVX10-LABEL: splatconstant_funnnel_v32i8:
; AVX10: # %bb.0:
; AVX10-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX10-NEXT: vpsrlw $4, %ymm1, %ymm0
-; AVX10-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm2))
+; AVX10-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm2))
; AVX10-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll
index 665223167fbb4..73aea49659dc9 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll
@@ -1166,7 +1166,7 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
@@ -1179,35 +1179,35 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwi
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
-; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm2))
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm2))
; AVX512VLVBMI2-NEXT: retq
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
index 9ce682306f18b..54c118b466b77 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -1928,7 +1928,7 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
@@ -1937,14 +1937,14 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm1))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm1))
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -1953,14 +1953,14 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm1))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm1))
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512VBMI2-NEXT: vzeroupper
; AVX512VBMI2-NEXT: retq
@@ -1969,7 +1969,7 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm1))
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm1))
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
index 3d4f283260aa5..cb0d7339237b1 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
@@ -469,17 +469,17 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm3
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (mem & (zmm3 ^ zmm2))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (bst32 & (zmm3 ^ zmm2))
; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm2
; AVX512F-NEXT: vpsllw $6, %ymm0, %ymm3
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (mem & (zmm3 ^ zmm2))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (bst32 & (zmm3 ^ zmm2))
; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm2
; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm3
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (mem & (zmm3 ^ zmm2))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (bst32 & (zmm3 ^ zmm2))
; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: retq
@@ -488,17 +488,17 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (mem & (ymm3 ^ ymm2))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (bst32 & (ymm3 ^ ymm2))
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $2, %ymm0, %ymm2
; AVX512VL-NEXT: vpsllw $6, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (mem & (ymm3 ^ ymm2))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (bst32 & (ymm3 ^ ymm2))
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm2
; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (mem & (ymm3 ^ ymm2))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (bst32 & (ymm3 ^ ymm2))
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
@@ -1693,7 +1693,7 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: retq
;
@@ -1701,14 +1701,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
@@ -1716,14 +1716,14 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512VBMI2-NEXT: retq
;
@@ -1731,7 +1731,7 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x) nounwind {
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
index 11ea650e1f02d..2e058f22c620a 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -721,7 +721,7 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_funnnel_v64i8:
@@ -733,35 +733,35 @@ define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind {
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VLVBMI2-NEXT: retq
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
index 7e7f89b4b57c4..fd32ce31753ee 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
@@ -179,7 +179,7 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm1
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm1 = zmm2 ^ (zmm1 & mem)
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm1 = zmm2 ^ (zmm1 & bst32)
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
; AVX512BW-NEXT: vpsubb %zmm0, %zmm1, %zmm0
@@ -500,7 +500,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm2
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm2 = zmm3 ^ (zmm2 & mem)
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm2 = zmm3 ^ (zmm2 & bst32)
; AVX512BW-NEXT: vpmovb2m %zmm1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
; AVX512BW-NEXT: vpsubb %zmm1, %zmm2, %zmm1
@@ -606,7 +606,7 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm2 # [7,0,9,0,11,0,13,0,15,0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31,0,33,0,35,0,37,0,38,0,36,0,34,0,32,0,30,0,28,0,26,0,24,0,22,0,20,0,18,0,16,0,14,0,12,0,10,0,8,0]
; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [0,8,0,10,0,12,0,14,0,16,0,18,0,20,0,22,0,24,0,26,0,28,0,30,0,32,0,34,0,36,0,38,0,37,0,35,0,33,0,31,0,29,0,27,0,25,0,23,0,21,0,19,0,17,0,15,0,13,0,11,0,9,0,7]
; AVX512BW-NEXT: vpsllw $8, %zmm1, %zmm1
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm2 & mem)
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm2 & bst32)
; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%res = srem <64 x i8> %a, <i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7>
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
index fd7a4c9b8d5ad..8911fa474e26f 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
@@ -651,7 +651,7 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm2 # [7,0,9,0,11,0,13,0,15,0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31,0,33,0,35,0,37,0,38,0,36,0,34,0,32,0,30,0,28,0,26,0,24,0,22,0,20,0,18,0,16,0,14,0,12,0,10,0,8,0]
; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [0,8,0,10,0,12,0,14,0,16,0,18,0,20,0,22,0,24,0,26,0,28,0,30,0,32,0,34,0,36,0,38,0,37,0,35,0,33,0,31,0,29,0,27,0,25,0,23,0,21,0,19,0,17,0,15,0,13,0,11,0,9,0,7]
; AVX512BW-NEXT: vpsllw $8, %zmm1, %zmm1
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm2 & mem)
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm2 & bst32)
; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%res = urem <64 x i8> %a, <i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7>
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll
index 418c987ab9a30..26a946c3d17da 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll
@@ -1783,7 +1783,7 @@ define void @store_i16_stride5_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,2]
; AVX512-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm9 & (ymm0 ^ ymm2))
; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,65535,65535,65535,65535,0,65535,65535,65535,65535,0,65535,65535,65535,65535,0]
-; AVX512-NEXT: vpternlogq {{.*#+}} ymm1 = mem ^ (ymm1 & (ymm0 ^ mem))
+; AVX512-NEXT: vpternlogq {{.*#+}} ymm1 = bst64 ^ (ymm1 & (ymm0 ^ bst64))
; AVX512-NEXT: vmovdqa %ymm1, 128(%r9)
; AVX512-NEXT: vmovdqa64 %zmm4, 64(%r9)
; AVX512-NEXT: vmovdqa64 %zmm6, (%r9)
@@ -1856,7 +1856,7 @@ define void @store_i16_stride5_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,2]
; AVX512-FCP-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm9 & (ymm0 ^ ymm2))
; AVX512-FCP-NEXT: vmovdqa {{.*#+}} ymm1 = [0,65535,65535,65535,65535,0,65535,65535,65535,65535,0,65535,65535,65535,65535,0]
-; AVX512-FCP-NEXT: vpternlogq {{.*#+}} ymm1 = mem ^ (ymm1 & (ymm0 ^ mem))
+; AVX512-FCP-NEXT: vpternlogq {{.*#+}} ymm1 = bst64 ^ (ymm1 & (ymm0 ^ bst64))
; AVX512-FCP-NEXT: vmovdqa %ymm1, 128(%r9)
; AVX512-FCP-NEXT: vmovdqa64 %zmm4, 64(%r9)
; AVX512-FCP-NEXT: vmovdqa64 %zmm6, (%r9)
@@ -1932,7 +1932,7 @@ define void @store_i16_stride5_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,2]
; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm9 & (ymm0 ^ ymm2))
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [0,65535,65535,65535,65535,0,65535,65535,65535,65535,0,65535,65535,65535,65535,0]
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = mem ^ (ymm1 & (ymm0 ^ mem))
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = bst64 ^ (ymm1 & (ymm0 ^ bst64))
; AVX512DQ-NEXT: vmovdqa %ymm1, 128(%r9)
; AVX512DQ-NEXT: vmovdqa64 %zmm4, 64(%r9)
; AVX512DQ-NEXT: vmovdqa64 %zmm6, (%r9)
@@ -2005,7 +2005,7 @@ define void @store_i16_stride5_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,2]
; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm9 & (ymm0 ^ ymm2))
; AVX512DQ-FCP-NEXT: vmovdqa {{.*#+}} ymm1 = [0,65535,65535,65535,65535,0,65535,65535,65535,65535,0,65535,65535,65535,65535,0]
-; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} ymm1 = mem ^ (ymm1 & (ymm0 ^ mem))
+; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} ymm1 = bst64 ^ (ymm1 & (ymm0 ^ bst64))
; AVX512DQ-FCP-NEXT: vmovdqa %ymm1, 128(%r9)
; AVX512DQ-FCP-NEXT: vmovdqa64 %zmm4, 64(%r9)
; AVX512DQ-FCP-NEXT: vmovdqa64 %zmm6, (%r9)
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
index 6f50d61f4d1f4..4e00e95f81e00 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
@@ -1388,7 +1388,7 @@ define void @store_i16_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512-NEXT: vpermq {{.*#+}} ymm7 = ymm9[1,3,3,1]
; AVX512-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[0,1,8,9],zero,zero,ymm7[u,u,u,u,u,u,u,u,2,3,18,19],zero,zero,ymm7[u,u,u,u,u,u,u,u,28,29,20,21]
; AVX512-NEXT: vmovdqa {{.*#+}} ymm8 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535]
-; AVX512-NEXT: vpternlogd {{.*#+}} ymm8 = (mem & ~ymm8) | ymm7
+; AVX512-NEXT: vpternlogd {{.*#+}} ymm8 = (bst32 & ~ymm8) | ymm7
; AVX512-NEXT: vpternlogq {{.*#+}} ymm8 = ymm2 ^ (mem & (ymm8 ^ ymm2))
; AVX512-NEXT: vpsrlq $48, %xmm4, %xmm2
; AVX512-NEXT: vpunpckhqdq {{.*#+}} xmm2 = xmm3[1],xmm2[1]
@@ -1448,7 +1448,7 @@ define void @store_i16_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm7 = ymm9[1,3,3,1]
; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[0,1,8,9],zero,zero,ymm7[u,u,u,u,u,u,u,u,2,3,18,19],zero,zero,ymm7[u,u,u,u,u,u,u,u,28,29,20,21]
; AVX512-FCP-NEXT: vmovdqa {{.*#+}} ymm8 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535]
-; AVX512-FCP-NEXT: vpternlogd {{.*#+}} ymm8 = (mem & ~ymm8) | ymm7
+; AVX512-FCP-NEXT: vpternlogd {{.*#+}} ymm8 = (bst32 & ~ymm8) | ymm7
; AVX512-FCP-NEXT: vpternlogq {{.*#+}} ymm8 = ymm6 ^ (mem & (ymm8 ^ ymm6))
; AVX512-FCP-NEXT: vpsrlq $48, %xmm3, %xmm3
; AVX512-FCP-NEXT: vpunpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm3[1]
@@ -1511,7 +1511,7 @@ define void @store_i16_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512DQ-NEXT: vpermq {{.*#+}} ymm7 = ymm9[1,3,3,1]
; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[0,1,8,9],zero,zero,ymm7[u,u,u,u,u,u,u,u,2,3,18,19],zero,zero,ymm7[u,u,u,u,u,u,u,u,28,29,20,21]
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm8 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535]
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm8 = (mem & ~ymm8) | ymm7
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm8 = (bst32 & ~ymm8) | ymm7
; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm8 = ymm2 ^ (mem & (ymm8 ^ ymm2))
; AVX512DQ-NEXT: vpsrlq $48, %xmm4, %xmm2
; AVX512DQ-NEXT: vpunpckhqdq {{.*#+}} xmm2 = xmm3[1],xmm2[1]
@@ -1571,7 +1571,7 @@ define void @store_i16_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm7 = ymm9[1,3,3,1]
; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[0,1,8,9],zero,zero,ymm7[u,u,u,u,u,u,u,u,2,3,18,19],zero,zero,ymm7[u,u,u,u,u,u,u,u,28,29,20,21]
; AVX512DQ-FCP-NEXT: vmovdqa {{.*#+}} ymm8 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535]
-; AVX512DQ-FCP-NEXT: vpternlogd {{.*#+}} ymm8 = (mem & ~ymm8) | ymm7
+; AVX512DQ-FCP-NEXT: vpternlogd {{.*#+}} ymm8 = (bst32 & ~ymm8) | ymm7
; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} ymm8 = ymm6 ^ (mem & (ymm8 ^ ymm6))
; AVX512DQ-FCP-NEXT: vpsrlq $48, %xmm3, %xmm3
; AVX512DQ-FCP-NEXT: vpunpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm3[1]
@@ -13076,7 +13076,7 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm2, %zmm16 # 32-byte Folded Reload
; AVX512-NEXT: vpternlogq {{.*#+}} zmm16 = zmm1 ^ (mem & (zmm16 ^ zmm1))
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = mem ^ (ymm18 & (ymm0 ^ mem))
+; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = bst32 ^ (ymm18 & (ymm0 ^ bst32))
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535]
; AVX512-NEXT: vpternlogq {{.*#+}} ymm21 = ymm21 ^ (ymm1 & (ymm21 ^ ymm0))
; AVX512-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload
@@ -13752,7 +13752,7 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512-FCP-NEXT: vpternlogq {{.*#+}} zmm5 = zmm5 ^ (zmm18 & (zmm5 ^ zmm6))
; AVX512-FCP-NEXT: vpternlogq {{.*#+}} zmm5 = zmm5 ^ (zmm0 & (zmm5 ^ zmm1))
; AVX512-FCP-NEXT: vmovdqa64 {{.*#+}} zmm0 = [65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535]
-; AVX512-FCP-NEXT: vpternlogd {{.*#+}} ymm2 = mem ^ (ymm0 & (ymm2 ^ mem))
+; AVX512-FCP-NEXT: vpternlogd {{.*#+}} ymm2 = bst32 ^ (ymm0 & (ymm2 ^ bst32))
; AVX512-FCP-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535]
; AVX512-FCP-NEXT: vpternlogq {{.*#+}} ymm3 = ymm3 ^ (ymm0 & (ymm3 ^ ymm2))
; AVX512-FCP-NEXT: vmovdqa64 {{.*#+}} zmm6 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535]
@@ -14403,7 +14403,7 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512DQ-NEXT: vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm2, %zmm16 # 32-byte Folded Reload
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm16 = zmm1 ^ (mem & (zmm16 ^ zmm1))
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm0 = mem ^ (ymm18 & (ymm0 ^ mem))
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm0 = bst32 ^ (ymm18 & (ymm0 ^ bst32))
; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535]
; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm21 = ymm21 ^ (ymm1 & (ymm21 ^ ymm0))
; AVX512DQ-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm18 # 64-byte Reload
@@ -15079,7 +15079,7 @@ define void @store_i16_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} zmm5 = zmm5 ^ (zmm18 & (zmm5 ^ zmm6))
; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} zmm5 = zmm5 ^ (zmm0 & (zmm5 ^ zmm1))
; AVX512DQ-FCP-NEXT: vmovdqa64 {{.*#+}} zmm0 = [65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535]
-; AVX512DQ-FCP-NEXT: vpternlogd {{.*#+}} ymm2 = mem ^ (ymm0 & (ymm2 ^ mem))
+; AVX512DQ-FCP-NEXT: vpternlogd {{.*#+}} ymm2 = bst32 ^ (ymm0 & (ymm2 ^ bst32))
; AVX512DQ-FCP-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535]
; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} ymm3 = ymm3 ^ (ymm0 & (ymm3 ^ ymm2))
; AVX512DQ-FCP-NEXT: vmovdqa64 {{.*#+}} zmm6 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535]
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index 993e6afc0eaf3..012e6ad56c06a 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -1581,7 +1581,7 @@ define <16 x i8> @splatconstant_rotate_v16i8(<16 x i8> %a) nounwind {
; AVX512NOVLX: # %bb.0:
; AVX512NOVLX-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512NOVLX-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512NOVLX-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512NOVLX-NEXT: vzeroupper
; AVX512NOVLX-NEXT: retq
@@ -1590,7 +1590,7 @@ define <16 x i8> @splatconstant_rotate_v16i8(<16 x i8> %a) nounwind {
; AVX512VLX: # %bb.0:
; AVX512VLX-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512VLX-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512VLX-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm1))
+; AVX512VLX-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm1))
; AVX512VLX-NEXT: retq
;
; XOP-LABEL: splatconstant_rotate_v16i8:
@@ -1739,7 +1739,7 @@ define <8 x i16> @splatconstant_rotate_mask_v8i16(<8 x i16> %a) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $5, %xmm0, %xmm1
; AVX512VL-NEXT: vpsrlw $11, %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} xmm0 = mem & (xmm0 | xmm1)
+; AVX512VL-NEXT: vpternlogd {{.*#+}} xmm0 = bst32 & (xmm0 | xmm1)
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_mask_v8i16:
@@ -1754,7 +1754,7 @@ define <8 x i16> @splatconstant_rotate_mask_v8i16(<8 x i16> %a) nounwind {
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $5, %xmm0, %xmm1
; AVX512VLBW-NEXT: vpsrlw $11, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} xmm0 = mem & (xmm0 | xmm1)
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} xmm0 = bst32 & (xmm0 | xmm1)
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v8i16:
@@ -1819,7 +1819,7 @@ define <16 x i8> @splatconstant_rotate_mask_v16i8(<16 x i8> %a) nounwind {
; AVX512NOVLX: # %bb.0:
; AVX512NOVLX-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512NOVLX-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512NOVLX-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512NOVLX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512NOVLX-NEXT: vzeroupper
; AVX512NOVLX-NEXT: retq
@@ -1828,7 +1828,7 @@ define <16 x i8> @splatconstant_rotate_mask_v16i8(<16 x i8> %a) nounwind {
; AVX512VLX: # %bb.0:
; AVX512VLX-NEXT: vpsllw $4, %xmm0, %xmm1
; AVX512VLX-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512VLX-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm1))
+; AVX512VLX-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ (bst32 & (xmm0 ^ xmm1))
; AVX512VLX-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VLX-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index c2c6a5f7eba57..c9e6b2d432407 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -387,12 +387,12 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (mem & (zmm3 ^ zmm2))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (bst32 & (zmm3 ^ zmm2))
; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm2
; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm3
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (mem & (zmm3 ^ zmm2))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 = zmm3 ^ (bst32 & (zmm3 ^ zmm2))
; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2
@@ -407,17 +407,17 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (mem & (ymm3 ^ ymm2))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (bst32 & (ymm3 ^ ymm2))
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (mem & (ymm3 ^ ymm2))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 ^ (bst32 & (ymm3 ^ ymm2))
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 | (ymm2 & mem)
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 | (ymm2 & bst32)
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: retq
@@ -1392,7 +1392,7 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind {
; AVX512NOVLX: # %bb.0:
; AVX512NOVLX-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512NOVLX-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512NOVLX-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512NOVLX-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512NOVLX-NEXT: retq
;
@@ -1400,7 +1400,7 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind {
; AVX512VLX: # %bb.0:
; AVX512VLX-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VLX-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VLX-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; AVX512VLX-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; AVX512VLX-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_rotate_v32i8:
@@ -1566,7 +1566,7 @@ define <16 x i16> @splatconstant_rotate_mask_v16i16(<16 x i16> %a) nounwind {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllw $5, %ymm0, %ymm1
; AVX512VL-NEXT: vpsrlw $11, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm0 = mem & (ymm0 | ymm1)
+; AVX512VL-NEXT: vpternlogd {{.*#+}} ymm0 = bst32 & (ymm0 | ymm1)
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_mask_v16i16:
@@ -1581,7 +1581,7 @@ define <16 x i16> @splatconstant_rotate_mask_v16i16(<16 x i16> %a) nounwind {
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $5, %ymm0, %ymm1
; AVX512VLBW-NEXT: vpsrlw $11, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = mem & (ymm0 | ymm1)
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} ymm0 = bst32 & (ymm0 | ymm1)
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v16i16:
@@ -1653,7 +1653,7 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind {
; AVX512NOVLX: # %bb.0:
; AVX512NOVLX-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512NOVLX-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512NOVLX-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512NOVLX-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512NOVLX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512NOVLX-NEXT: retq
;
@@ -1661,7 +1661,7 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind {
; AVX512VLX: # %bb.0:
; AVX512VLX-NEXT: vpsllw $4, %ymm0, %ymm1
; AVX512VLX-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512VLX-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (mem & (ymm0 ^ ymm1))
+; AVX512VLX-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ (bst32 & (ymm0 ^ ymm1))
; AVX512VLX-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VLX-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index 8ac0b178a16df..e3c558b06a612 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -754,7 +754,7 @@ define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_rotate_v64i8:
@@ -766,35 +766,35 @@ define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind {
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_rotate_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_rotate_v64i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_rotate_v64i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VLVBMI2-NEXT: retq
%shl = shl <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%lshr = lshr <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -844,7 +844,7 @@ define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind {
; AVX512F-NEXT: vpsrlw $11, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $11, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = mem & (zmm0 | zmm1)
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = bst32 & (zmm0 | zmm1)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_rotate_mask_v32i16:
@@ -856,21 +856,21 @@ define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind {
; AVX512VL-NEXT: vpsrlw $11, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $11, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = mem & (zmm0 | zmm1)
+; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = bst32 & (zmm0 | zmm1)
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_mask_v32i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $5, %zmm0, %zmm1
; AVX512BW-NEXT: vpsrlw $11, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = mem & (zmm0 | zmm1)
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = bst32 & (zmm0 | zmm1)
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_rotate_mask_v32i16:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $5, %zmm0, %zmm1
; AVX512VLBW-NEXT: vpsrlw $11, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = mem & (zmm0 | zmm1)
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = bst32 & (zmm0 | zmm1)
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v32i16:
@@ -902,7 +902,7 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
@@ -915,7 +915,7 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
@@ -923,7 +923,7 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -931,7 +931,7 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VLBW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
@@ -939,7 +939,7 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VBMI2-NEXT: retq
;
@@ -947,7 +947,7 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512VLVBMI2-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: retq
%shl = shl <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
index 60295f1c145a1..8807600669110 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -2021,7 +2021,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX512DQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512DQVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512DQVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512DQVL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: retq
;
@@ -2029,7 +2029,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpbroadcastb {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512BWVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512BWVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512BWVL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 9f3fff34ea20c..a70ec2cf5d973 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -2295,7 +2295,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpsrlw $3, %ymm0, %ymm0
; AVX512DQVL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512DQVL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & mem)
+; AVX512DQVL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & bst32)
; AVX512DQVL-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; AVX512DQVL-NEXT: retq
;
@@ -2303,7 +2303,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpsrlw $3, %ymm0, %ymm0
; AVX512BWVL-NEXT: vpbroadcastb {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512BWVL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & mem)
+; AVX512BWVL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & bst32)
; AVX512BWVL-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; AVX512BWVL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
index 74dbee5e5d2ca..28f1c0c462984 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -540,7 +540,7 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem)
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & bst32)
; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
index 4f8cbc07243fd..65d123793e42d 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
@@ -2344,7 +2344,7 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX512DQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512DQVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512DQVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512DQVL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: retq
;
@@ -2352,7 +2352,7 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpbroadcastb {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512BWVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512BWVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512BWVL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
@@ -2414,7 +2414,7 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX512DQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512DQVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512DQVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512DQVL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: retq
;
@@ -2422,7 +2422,7 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpbroadcastb {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512BWVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512BWVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512BWVL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
@@ -2484,7 +2484,7 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX512DQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512DQVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512DQVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512DQVL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512DQVL-NEXT: retq
;
@@ -2492,7 +2492,7 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpbroadcastb {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX512BWVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512BWVL-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512BWVL-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index 4f55f7af20f47..21880144bb5d8 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -1369,7 +1369,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX512DQVL-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0]
; AVX512DQVL-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1]
; AVX512DQVL-NEXT: vpsllw $8, %ymm0, %ymm0
-; AVX512DQVL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 | (ymm1 & mem)
+; AVX512DQVL-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 | (ymm1 & bst32)
; AVX512DQVL-NEXT: retq
;
; AVX512BWVL-LABEL: constant_shift_v32i8:
diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
index a42056be895e7..08a107a2cec8d 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll
@@ -324,7 +324,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-NEXT: vpmaddubsw %ymm3, %ymm1, %ymm1
; AVX512DQ-NEXT: vpsllw $8, %ymm1, %ymm1
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 | (zmm2 & mem)
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 | (zmm2 & bst32)
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: constant_shift_v64i8:
@@ -332,7 +332,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 # [1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0,1,0,4,0,16,0,64,0,128,0,32,0,8,0,2,0]
; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1,0,2,0,8,0,32,0,128,0,64,0,16,0,4,0,1]
; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 | (zmm1 & mem)
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 | (zmm1 & bst32)
; AVX512BW-NEXT: retq
%shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <64 x i8> %shift
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
index 07498c1233b5d..38982a2b0362a 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
@@ -355,7 +355,7 @@ define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){
;
; AVX512F-LABEL: test_mm512_mask_blend_epi16:
; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ (bst32 & (zmm0 ^ zmm1))
; AVX512F-NEXT: ret{{[l|q]}}
entry:
%0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
diff --git a/llvm/test/CodeGen/X86/vselect-pcmp.ll b/llvm/test/CodeGen/X86/vselect-pcmp.ll
index ab487ed888981..5f032f6c14d12 100644
--- a/llvm/test/CodeGen/X86/vselect-pcmp.ll
+++ b/llvm/test/CodeGen/X86/vselect-pcmp.ll
@@ -1763,7 +1763,7 @@ define <64 x i8> @PR110875(<32 x i8> %a0, <32 x i8> %a1, i64 %a2) {
; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = mem ^ (zmm0 & (zmm1 ^ mem))
+; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = bst32 ^ (zmm0 & (zmm1 ^ bst32))
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: PR110875:
@@ -1780,7 +1780,7 @@ define <64 x i8> @PR110875(<32 x i8> %a0, <32 x i8> %a1, i64 %a2) {
; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = mem ^ (zmm0 & (zmm1 ^ mem))
+; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = bst32 ^ (zmm0 & (zmm1 ^ bst32))
; AVX512VL-NEXT: retq
;
; XOP-LABEL: PR110875:
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
index 7ad9fb0c27170..b2a92d572a0e3 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
@@ -1235,7 +1235,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.v
; AVX512F-NEXT: vmovdqa (%rdi), %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512F-NEXT: vzeroupper
@@ -1248,7 +1248,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.v
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512DQ-NEXT: vzeroupper
@@ -1359,7 +1359,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.v
; AVX512F-NEXT: vmovdqa (%rdi), %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512F-NEXT: vzeroupper
@@ -1372,7 +1372,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.v
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512DQ-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
index 266b06a23df94..c5a6be298d91b 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -1020,7 +1020,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.e
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512F-NEXT: vzeroupper
@@ -1030,7 +1030,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.e
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm0
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512DQ-NEXT: vzeroupper
@@ -1116,7 +1116,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.e
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512F-NEXT: vzeroupper
@@ -1126,7 +1126,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.e
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm0
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512DQ-NEXT: vzeroupper
>From ffaedfed957ff22194c51ca268dc1ef65c7e441d Mon Sep 17 00:00:00 2001
From: Shamshura Egor <shamshuraegor at gmail.com>
Date: Wed, 11 Jun 2025 16:23:11 +0000
Subject: [PATCH 2/3] Fixed vector-idiv-sdiv-512.ll
---
llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll | 176 +++++++++---------
1 file changed, 89 insertions(+), 87 deletions(-)
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
index a0864e1c67d7e..fd32ce31753ee 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
@@ -140,28 +140,28 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
; AVX512F-NEXT: vpackuswb %ymm3, %ymm5, %ymm3
; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm1
+; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm3
; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1
-; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm3 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm5 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
-; AVX512F-NEXT: vpxor %ymm5, %ymm1, %ymm1
-; AVX512F-NEXT: vpsubb %ymm5, %ymm1, %ymm1
-; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm6
+; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
+; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
+; AVX512F-NEXT: vpxor %ymm6, %ymm1, %ymm1
+; AVX512F-NEXT: vpsubb %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpsubb %ymm6, %ymm1, %ymm1
-; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm6 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
-; AVX512F-NEXT: vpmulhw %ymm4, %ymm6, %ymm6
-; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6
+; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
+; AVX512F-NEXT: vpmulhw %ymm4, %ymm3, %ymm3
+; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm7 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
; AVX512F-NEXT: vpmulhw %ymm4, %ymm7, %ymm4
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
-; AVX512F-NEXT: vpackuswb %ymm6, %ymm4, %ymm4
-; AVX512F-NEXT: vpaddb %ymm0, %ymm4, %ymm0
-; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vpxor %ymm5, %ymm0, %ymm0
-; AVX512F-NEXT: vpsubb %ymm5, %ymm0, %ymm0
+; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT: vpaddb %ymm0, %ymm3, %ymm0
; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm2
+; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm0
+; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
+; AVX512F-NEXT: vpxor %ymm6, %ymm0, %ymm0
; AVX512F-NEXT: vpsubb %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpsubb %ymm6, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
@@ -181,8 +181,9 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm1 = zmm2 ^ (zmm1 & bst32)
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
-; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
+; AVX512BW-NEXT: vpsubb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%res = sdiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
ret <64 x i8> %res
@@ -211,13 +212,13 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsraw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [16,32,16,128,32,64,64,16,32,64,128,32,64,128,64,64]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512F-NEXT: vpsraw $8, %ymm2, %ymm2
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [64,16,32,32,16,16,16,16,16,64,32,128,64,32,32,32]
-; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
-; AVX512F-NEXT: vpackuswb %ymm4, %ymm2, %ymm2
-; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm1, %ymm4
-; AVX512F-NEXT: vpsubb %ymm4, %ymm2, %ymm2
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512F-NEXT: vpsraw $8, %ymm5, %ymm5
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [64,16,32,32,16,16,16,16,16,64,32,128,64,32,32,32]
+; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
+; AVX512F-NEXT: vpackuswb %ymm4, %ymm5, %ymm4
+; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm1, %ymm2
+; AVX512F-NEXT: vpsubb %ymm2, %ymm4, %ymm2
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
@@ -230,13 +231,13 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsraw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [32,32,32,64,128,32,64,16,16,16,16,16,32,32,16,64]
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512F-NEXT: vpsraw $8, %ymm0, %ymm0
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64,64,128,64,32,128,64,32,16,64,64,32,128,16,32,16]
-; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
-; AVX512F-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm1
-; AVX512F-NEXT: vpsubb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512F-NEXT: vpsraw $8, %ymm4, %ymm4
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [64,64,128,64,32,128,64,32,16,64,64,32,128,16,32,16]
+; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
+; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT: vpsubb %ymm0, %ymm3, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
@@ -256,14 +257,14 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpsraw $8, %zmm1, %zmm1
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
-; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
-; AVX512BW-NEXT: vpsraw $8, %zmm0, %zmm0
-; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
-; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
+; AVX512BW-NEXT: vpsraw $8, %zmm2, %zmm2
+; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
+; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
+; AVX512BW-NEXT: vpackuswb %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
-; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
+; AVX512BW-NEXT: vpsubb %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
%res = sdiv <64 x i8> %a, <i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7>
ret <64 x i8> %res
@@ -440,49 +441,49 @@ define <32 x i16> @test_rem7_32i16(<32 x i16> %a) nounwind {
define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512F-LABEL: test_rem7_64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632]
; AVX512F-NEXT: vpmulhw %ymm4, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23]
; AVX512F-NEXT: vpmulhw %ymm4, %ymm5, %ymm5
; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
; AVX512F-NEXT: vpackuswb %ymm3, %ymm5, %ymm3
-; AVX512F-NEXT: vpaddb %ymm2, %ymm3, %ymm3
+; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm3
+; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm5
; AVX512F-NEXT: vpsrlw $2, %ymm3, %ymm3
-; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; AVX512F-NEXT: vpand %ymm5, %ymm3, %ymm3
-; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
-; AVX512F-NEXT: vpxor %ymm6, %ymm3, %ymm3
-; AVX512F-NEXT: vpsubb %ymm6, %ymm3, %ymm3
-; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm7
+; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
+; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
+; AVX512F-NEXT: vpxor %ymm7, %ymm3, %ymm3
+; AVX512F-NEXT: vpsubb %ymm5, %ymm3, %ymm3
; AVX512F-NEXT: vpsubb %ymm7, %ymm3, %ymm3
-; AVX512F-NEXT: vpsllw $3, %ymm3, %ymm7
+; AVX512F-NEXT: vpsllw $3, %ymm3, %ymm5
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm8 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
-; AVX512F-NEXT: vpand %ymm7, %ymm8, %ymm7
-; AVX512F-NEXT: vpsubb %ymm7, %ymm3, %ymm3
-; AVX512F-NEXT: vpaddb %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
+; AVX512F-NEXT: vpand %ymm5, %ymm8, %ymm5
+; AVX512F-NEXT: vpsubb %ymm5, %ymm3, %ymm3
+; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
; AVX512F-NEXT: vpmulhw %ymm4, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm7 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX512F-NEXT: vpmulhw %ymm4, %ymm7, %ymm4
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
+; AVX512F-NEXT: vpmulhw %ymm4, %ymm5, %ymm4
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vpaddb %ymm0, %ymm3, %ymm3
+; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw $2, %ymm3, %ymm3
-; AVX512F-NEXT: vpand %ymm5, %ymm3, %ymm3
-; AVX512F-NEXT: vpxor %ymm6, %ymm3, %ymm3
-; AVX512F-NEXT: vpsubb %ymm6, %ymm3, %ymm3
-; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpsubb %ymm1, %ymm3, %ymm1
-; AVX512F-NEXT: vpsllw $3, %ymm1, %ymm3
+; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
+; AVX512F-NEXT: vpxor %ymm7, %ymm3, %ymm3
+; AVX512F-NEXT: vpsubb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
+; AVX512F-NEXT: vpsllw $3, %ymm2, %ymm3
; AVX512F-NEXT: vpand %ymm3, %ymm8, %ymm3
-; AVX512F-NEXT: vpsubb %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT: vpsubb %ymm3, %ymm2, %ymm2
+; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_rem7_64i8:
@@ -501,8 +502,9 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm2 = zmm3 ^ (zmm2 & bst32)
; AVX512BW-NEXT: vpmovb2m %zmm1, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %zmm2
-; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
+; AVX512BW-NEXT: vpsubb %zmm1, %zmm2, %zmm1
+; AVX512BW-NEXT: vpsubb %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: vpsllw $3, %zmm1, %zmm2
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
@@ -535,13 +537,13 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsraw $8, %ymm5, %ymm5
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [16,32,16,128,32,64,64,16,32,64,128,32,64,128,64,64]
; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512F-NEXT: vpsraw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [64,16,32,32,16,16,16,16,16,64,32,128,64,32,32,32]
-; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpackuswb %ymm5, %ymm3, %ymm3
-; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm5
-; AVX512F-NEXT: vpsubb %ymm5, %ymm3, %ymm3
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm6 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512F-NEXT: vpsraw $8, %ymm6, %ymm6
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 # [64,16,32,32,16,16,16,16,16,64,32,128,64,32,32,32]
+; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6
+; AVX512F-NEXT: vpackuswb %ymm5, %ymm6, %ymm5
+; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm3
+; AVX512F-NEXT: vpsubb %ymm3, %ymm5, %ymm3
; AVX512F-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm5 # [38,0,36,0,34,0,32,0,30,0,28,0,26,0,24,0,22,0,20,0,18,0,16,0,14,0,12,0,10,0,8,0]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm6 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpand %ymm6, %ymm5, %ymm5
@@ -561,13 +563,13 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsraw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [32,32,32,64,128,32,64,16,16,16,16,16,32,32,16,64]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512F-NEXT: vpsraw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [64,64,128,64,32,128,64,32,16,64,64,32,128,16,32,16]
-; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpackuswb %ymm4, %ymm3, %ymm3
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512F-NEXT: vpsraw $8, %ymm5, %ymm5
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [64,64,128,64,32,128,64,32,16,64,64,32,128,16,32,16]
+; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
+; AVX512F-NEXT: vpackuswb %ymm4, %ymm5, %ymm4
; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpsubb %ymm1, %ymm3, %ymm1
+; AVX512F-NEXT: vpsubb %ymm1, %ymm4, %ymm1
; AVX512F-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm3 # [7,0,9,0,11,0,13,0,15,0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31,0,33,0,35,0,37,0]
; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
; AVX512F-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [0,8,0,10,0,12,0,14,0,16,0,18,0,20,0,22,0,24,0,26,0,28,0,30,0,32,0,34,0,36,0,38]
@@ -593,14 +595,14 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpsraw $8, %zmm2, %zmm2
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
-; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
-; AVX512BW-NEXT: vpsraw $8, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
-; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
-; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
+; AVX512BW-NEXT: vpsraw $8, %zmm3, %zmm3
+; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
+; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3
+; AVX512BW-NEXT: vpackuswb %zmm2, %zmm3, %zmm2
; AVX512BW-NEXT: vpmovb2m %zmm1, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %zmm2
-; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
+; AVX512BW-NEXT: vpsubb %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm2 # [7,0,9,0,11,0,13,0,15,0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31,0,33,0,35,0,37,0,38,0,36,0,34,0,32,0,30,0,28,0,26,0,24,0,22,0,20,0,18,0,16,0,14,0,12,0,10,0,8,0]
; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [0,8,0,10,0,12,0,14,0,16,0,18,0,20,0,22,0,24,0,26,0,28,0,30,0,32,0,34,0,36,0,38,0,37,0,35,0,33,0,31,0,29,0,27,0,25,0,23,0,21,0,19,0,17,0,15,0,13,0,11,0,9,0,7]
; AVX512BW-NEXT: vpsllw $8, %zmm1, %zmm1
>From 7f148a9b7a9e510a92a9060d9e64b6dfe349b7dd Mon Sep 17 00:00:00 2001
From: Shamshura Egor <shamshuraegor at gmail.com>
Date: Wed, 11 Jun 2025 18:56:13 +0000
Subject: [PATCH 3/3] Fixed vector-idiv-sdiv-512.ll (oops)
---
llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll | 190 +++++++++---------
1 file changed, 95 insertions(+), 95 deletions(-)
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
index fd32ce31753ee..245255a9a6e96 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
@@ -140,28 +140,28 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
; AVX512F-NEXT: vpackuswb %ymm3, %ymm5, %ymm3
; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm1
-; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm3
; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1
-; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
-; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
-; AVX512F-NEXT: vpxor %ymm6, %ymm1, %ymm1
-; AVX512F-NEXT: vpsubb %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm3 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm5 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
+; AVX512F-NEXT: vpxor %ymm5, %ymm1, %ymm1
+; AVX512F-NEXT: vpsubb %ymm5, %ymm1, %ymm1
+; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm6
; AVX512F-NEXT: vpsubb %ymm6, %ymm1, %ymm1
-; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
-; AVX512F-NEXT: vpmulhw %ymm4, %ymm3, %ymm3
-; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
+; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm6 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
+; AVX512F-NEXT: vpmulhw %ymm4, %ymm6, %ymm6
+; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm7 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
; AVX512F-NEXT: vpmulhw %ymm4, %ymm7, %ymm4
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
-; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
-; AVX512F-NEXT: vpaddb %ymm0, %ymm3, %ymm0
-; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm2
+; AVX512F-NEXT: vpackuswb %ymm6, %ymm4, %ymm4
+; AVX512F-NEXT: vpaddb %ymm0, %ymm4, %ymm0
; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm0
-; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
-; AVX512F-NEXT: vpxor %ymm6, %ymm0, %ymm0
+; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpxor %ymm5, %ymm0, %ymm0
+; AVX512F-NEXT: vpsubb %ymm5, %ymm0, %ymm0
+; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm2
; AVX512F-NEXT: vpsubb %ymm2, %ymm0, %ymm0
-; AVX512F-NEXT: vpsubb %ymm6, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
@@ -177,13 +177,13 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
-; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm1
-; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm1 = zmm2 ^ (zmm1 & bst32)
+; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm1 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & bst32)
+; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
-; AVX512BW-NEXT: vpsubb %zmm0, %zmm1, %zmm0
-; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
+; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%res = sdiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
ret <64 x i8> %res
@@ -212,13 +212,13 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsraw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [16,32,16,128,32,64,64,16,32,64,128,32,64,128,64,64]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512F-NEXT: vpsraw $8, %ymm5, %ymm5
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [64,16,32,32,16,16,16,16,16,64,32,128,64,32,32,32]
-; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
-; AVX512F-NEXT: vpackuswb %ymm4, %ymm5, %ymm4
-; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm1, %ymm2
-; AVX512F-NEXT: vpsubb %ymm2, %ymm4, %ymm2
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512F-NEXT: vpsraw $8, %ymm2, %ymm2
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [64,16,32,32,16,16,16,16,16,64,32,128,64,32,32,32]
+; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2
+; AVX512F-NEXT: vpackuswb %ymm4, %ymm2, %ymm2
+; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm1, %ymm4
+; AVX512F-NEXT: vpsubb %ymm4, %ymm2, %ymm2
; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
@@ -231,13 +231,13 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsraw $8, %ymm3, %ymm3
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [32,32,32,64,128,32,64,16,16,16,16,16,32,32,16,64]
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512F-NEXT: vpsraw $8, %ymm4, %ymm4
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [64,64,128,64,32,128,64,32,16,64,64,32,128,16,32,16]
-; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
-; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
-; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; AVX512F-NEXT: vpsubb %ymm0, %ymm3, %ymm0
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512F-NEXT: vpsraw $8, %ymm0, %ymm0
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64,64,128,64,32,128,64,32,16,64,64,32,128,16,32,16]
+; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX512F-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm1
+; AVX512F-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
@@ -257,14 +257,14 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpsraw $8, %zmm1, %zmm1
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
-; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
-; AVX512BW-NEXT: vpsraw $8, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
-; AVX512BW-NEXT: vpackuswb %zmm1, %zmm2, %zmm1
+; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
+; AVX512BW-NEXT: vpsraw $8, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
+; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
-; AVX512BW-NEXT: vpsubb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
+; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%res = sdiv <64 x i8> %a, <i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7>
ret <64 x i8> %res
@@ -441,49 +441,49 @@ define <32 x i16> @test_rem7_32i16(<32 x i16> %a) nounwind {
define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512F-LABEL: test_rem7_64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31]
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632]
; AVX512F-NEXT: vpmulhw %ymm4, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23]
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
; AVX512F-NEXT: vpmulhw %ymm4, %ymm5, %ymm5
; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
; AVX512F-NEXT: vpackuswb %ymm3, %ymm5, %ymm3
-; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm3
-; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm5
+; AVX512F-NEXT: vpaddb %ymm2, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw $2, %ymm3, %ymm3
-; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
-; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
-; AVX512F-NEXT: vpxor %ymm7, %ymm3, %ymm3
-; AVX512F-NEXT: vpsubb %ymm5, %ymm3, %ymm3
+; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX512F-NEXT: vpand %ymm5, %ymm3, %ymm3
+; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
+; AVX512F-NEXT: vpxor %ymm6, %ymm3, %ymm3
+; AVX512F-NEXT: vpsubb %ymm6, %ymm3, %ymm3
+; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm7
; AVX512F-NEXT: vpsubb %ymm7, %ymm3, %ymm3
-; AVX512F-NEXT: vpsllw $3, %ymm3, %ymm5
+; AVX512F-NEXT: vpsllw $3, %ymm3, %ymm7
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm8 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
-; AVX512F-NEXT: vpand %ymm5, %ymm8, %ymm5
-; AVX512F-NEXT: vpsubb %ymm5, %ymm3, %ymm3
-; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
+; AVX512F-NEXT: vpand %ymm7, %ymm8, %ymm7
+; AVX512F-NEXT: vpsubb %ymm7, %ymm3, %ymm3
+; AVX512F-NEXT: vpaddb %ymm3, %ymm2, %ymm2
+; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
; AVX512F-NEXT: vpmulhw %ymm4, %ymm3, %ymm3
; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
-; AVX512F-NEXT: vpmulhw %ymm4, %ymm5, %ymm4
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm7 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
+; AVX512F-NEXT: vpmulhw %ymm4, %ymm7, %ymm4
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3
; AVX512F-NEXT: vpaddb %ymm0, %ymm3, %ymm3
-; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw $2, %ymm3, %ymm3
-; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
-; AVX512F-NEXT: vpxor %ymm7, %ymm3, %ymm3
-; AVX512F-NEXT: vpsubb %ymm2, %ymm3, %ymm2
-; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
-; AVX512F-NEXT: vpsllw $3, %ymm2, %ymm3
+; AVX512F-NEXT: vpand %ymm5, %ymm3, %ymm3
+; AVX512F-NEXT: vpxor %ymm6, %ymm3, %ymm3
+; AVX512F-NEXT: vpsubb %ymm6, %ymm3, %ymm3
+; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vpsubb %ymm1, %ymm3, %ymm1
+; AVX512F-NEXT: vpsllw $3, %ymm1, %ymm3
; AVX512F-NEXT: vpand %ymm3, %ymm8, %ymm3
-; AVX512F-NEXT: vpsubb %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpsubb %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_rem7_64i8:
@@ -498,13 +498,13 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm2
-; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
-; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm2 = zmm3 ^ (zmm2 & bst32)
+; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
+; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm1 = zmm2 ^ (zmm1 & bst32)
+; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovb2m %zmm1, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
-; AVX512BW-NEXT: vpsubb %zmm1, %zmm2, %zmm1
-; AVX512BW-NEXT: vpsubb %zmm3, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovm2b %k0, %zmm2
+; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpsllw $3, %zmm1, %zmm2
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
@@ -537,13 +537,13 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsraw $8, %ymm5, %ymm5
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [16,32,16,128,32,64,64,16,32,64,128,32,64,128,64,64]
; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm6 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512F-NEXT: vpsraw $8, %ymm6, %ymm6
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 # [64,16,32,32,16,16,16,16,16,64,32,128,64,32,32,32]
-; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6
-; AVX512F-NEXT: vpackuswb %ymm5, %ymm6, %ymm5
-; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm3
-; AVX512F-NEXT: vpsubb %ymm3, %ymm5, %ymm3
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512F-NEXT: vpsraw $8, %ymm3, %ymm3
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [64,16,32,32,16,16,16,16,16,64,32,128,64,32,32,32]
+; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
+; AVX512F-NEXT: vpackuswb %ymm5, %ymm3, %ymm3
+; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm5
+; AVX512F-NEXT: vpsubb %ymm5, %ymm3, %ymm3
; AVX512F-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm5 # [38,0,36,0,34,0,32,0,30,0,28,0,26,0,24,0,22,0,20,0,18,0,16,0,14,0,12,0,10,0,8,0]
; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm6 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpand %ymm6, %ymm5, %ymm5
@@ -563,13 +563,13 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsraw $8, %ymm4, %ymm4
; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [32,32,32,64,128,32,64,16,16,16,16,16,32,32,16,64]
; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4
-; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512F-NEXT: vpsraw $8, %ymm5, %ymm5
-; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [64,64,128,64,32,128,64,32,16,64,64,32,128,16,32,16]
-; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5
-; AVX512F-NEXT: vpackuswb %ymm4, %ymm5, %ymm4
+; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512F-NEXT: vpsraw $8, %ymm3, %ymm3
+; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [64,64,128,64,32,128,64,32,16,64,64,32,128,16,32,16]
+; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3
+; AVX512F-NEXT: vpackuswb %ymm4, %ymm3, %ymm3
; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpsubb %ymm1, %ymm4, %ymm1
+; AVX512F-NEXT: vpsubb %ymm1, %ymm3, %ymm1
; AVX512F-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm3 # [7,0,9,0,11,0,13,0,15,0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31,0,33,0,35,0,37,0]
; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3
; AVX512F-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [0,8,0,10,0,12,0,14,0,16,0,18,0,20,0,22,0,24,0,26,0,28,0,30,0,32,0,34,0,36,0,38]
@@ -595,14 +595,14 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: vpsraw $8, %zmm2, %zmm2
; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
-; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
-; AVX512BW-NEXT: vpsraw $8, %zmm3, %zmm3
-; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3
-; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3
-; AVX512BW-NEXT: vpackuswb %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
+; AVX512BW-NEXT: vpsraw $8, %zmm1, %zmm1
+; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
+; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
+; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovb2m %zmm1, %k0
-; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
-; AVX512BW-NEXT: vpsubb %zmm1, %zmm2, %zmm1
+; AVX512BW-NEXT: vpmovm2b %k0, %zmm2
+; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm2 # [7,0,9,0,11,0,13,0,15,0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31,0,33,0,35,0,37,0,38,0,36,0,34,0,32,0,30,0,28,0,26,0,24,0,22,0,20,0,18,0,16,0,14,0,12,0,10,0,8,0]
; AVX512BW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [0,8,0,10,0,12,0,14,0,16,0,18,0,20,0,22,0,24,0,26,0,28,0,30,0,32,0,34,0,36,0,38,0,37,0,35,0,33,0,31,0,29,0,27,0,25,0,23,0,21,0,19,0,17,0,15,0,13,0,11,0,9,0,7]
; AVX512BW-NEXT: vpsllw $8, %zmm1, %zmm1
More information about the llvm-commits
mailing list