[llvm] use "mem" just for full width loads and "bst32" / "bst64" (PR #143721)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 11 08:07:27 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Shamshura Egor (egorshamshura)
<details>
<summary>Changes</summary>
---
Patch is 141.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143721.diff
51 Files Affected:
- (modified) llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp (+24-3)
- (modified) llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/avgfloors.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/avx512-cvt.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/avx512-logic.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/avx512fp16-arith.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/avx512vl-logic.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/combine-bitselect.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/combine-or-shuffle.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/fp-round.ll (+17-17)
- (modified) llvm/test/CodeGen/X86/gfni-funnel-shifts.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/gfni-shifts.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/midpoint-int-vec-128.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/midpoint-int-vec-256.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/min-legal-vector-width.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/pmul.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/psubus.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/sadd_sat_vec.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/ssub_sat_vec.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/usub_sat_vec.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-fshl-128.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-fshl-256.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-fshl-512.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-fshl-rot-128.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-fshl-rot-256.ll (+11-11)
- (modified) llvm/test/CodeGen/X86/vector-fshl-rot-512.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-fshr-128.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-fshr-256.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-fshr-512.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-fshr-rot-128.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-fshr-rot-256.ll (+12-12)
- (modified) llvm/test/CodeGen/X86/vector-fshr-rot-512.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/vector-rotate-128.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-rotate-256.ll (+11-11)
- (modified) llvm/test/CodeGen/X86/vector-rotate-512.ll (+16-16)
- (modified) llvm/test/CodeGen/X86/vector-shift-ashr-128.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-shift-ashr-256.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-shift-ashr-512.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/vector-shift-shl-256.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-shift-shl-512.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-avx512.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vselect-pcmp.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll (+4-4)
``````````diff
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 4c26fc86f9547..98a05e4c2c0e7 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -931,10 +931,18 @@ static bool printPTERNLOGComments(const MCInst *MI, raw_ostream &OS,
// dest, src1, mask, src2, memory, tbl
CASE_PTERNLOG(PTERNLOGD, m)
CASE_PTERNLOG(PTERNLOGQ, m)
+ Src2Idx = NumOperands - 7;
+ Src3Idx = -1;
+ break;
+
CASE_PTERNLOG(PTERNLOGD, mb)
+ Src2Idx = NumOperands - 7;
+ Src3Idx = -2;
+ break;
+
CASE_PTERNLOG(PTERNLOGQ, mb)
Src2Idx = NumOperands - 7;
- Src3Idx = -1;
+ Src3Idx = -3;
break;
default:
@@ -943,8 +951,21 @@ static bool printPTERNLOGComments(const MCInst *MI, raw_ostream &OS,
StringRef DestName = getRegName(MI->getOperand(0).getReg());
StringRef Src1Name = getRegName(MI->getOperand(1).getReg());
StringRef Src2Name = getRegName(MI->getOperand(Src2Idx).getReg());
- StringRef Src3Name =
- Src3Idx != -1 ? getRegName(MI->getOperand(Src3Idx).getReg()) : "mem";
+ StringRef Src3Name;
+ switch (Src3Idx) {
+ case -1:
+ Src3Name = "mem";
+ break;
+ case -2:
+ Src3Name = "bst32";
+ break;
+ case -3:
+ Src3Name = "bst64";
+ break;
+ default:
+ Src3Name = getRegName(MI->getOperand(Src3Idx).getReg());
+ break;
+ }
uint8_t TruthTable = MI->getOperand(NumOperands - 1).getImm();
StringRef SrcNames[] = {Src1Name, Src2Name, Src3Name};
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
index 7d2915ddc75b1..749a42918b9fb 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
@@ -1235,7 +1235,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.v
; AVX512F-NEXT: vmovdqa (%rdi), %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512F-NEXT: vzeroupper
@@ -1248,7 +1248,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.v
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512DQ-NEXT: vzeroupper
@@ -1359,7 +1359,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.v
; AVX512F-NEXT: vmovdqa (%rdi), %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512F-NEXT: vzeroupper
@@ -1372,7 +1372,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.v
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
; AVX512DQ-NEXT: vzeroupper
@@ -2702,7 +2702,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm0 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm0 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2717,7 +2717,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm0 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm0 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2964,7 +2964,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.v
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm0 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm0 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2979,7 +2979,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.v
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm0 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm0 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
index f5802150d5353..2f780c31e8290 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -1020,7 +1020,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.e
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512F-NEXT: vzeroupper
@@ -1030,7 +1030,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.e
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm0
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 ^ (bst32 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512DQ-NEXT: vzeroupper
@@ -1116,7 +1116,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.e
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512F-NEXT: vzeroupper
@@ -1126,7 +1126,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.e
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm0
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (mem & (ymm1 ^ ymm0))
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 ^ (bst64 & (ymm1 ^ ymm0))
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512DQ-NEXT: vzeroupper
@@ -2125,7 +2125,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm0
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512F-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
+; AVX512F-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (bst32 & (ymm0 ^ ymm1))
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2137,7 +2137,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm0
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
+; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (bst32 & (ymm0 ^ ymm1))
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2346,7 +2346,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.e
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm0
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512F-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
+; AVX512F-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (bst64 & (ymm0 ^ ymm1))
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2358,7 +2358,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.e
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm0
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
+; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (bst64 & (ymm0 ^ ymm1))
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
diff --git a/llvm/test/CodeGen/X86/avgfloors.ll b/llvm/test/CodeGen/X86/avgfloors.ll
index 0508e5ccb5430..d6edc5d6358a1 100644
--- a/llvm/test/CodeGen/X86/avgfloors.ll
+++ b/llvm/test/CodeGen/X86/avgfloors.ll
@@ -53,7 +53,7 @@ define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind {
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
-; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
@@ -108,7 +108,7 @@ define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind {
; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
-; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst32)
; AVX512-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
@@ -405,7 +405,7 @@ define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind {
; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
-; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & bst32)
; AVX512-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
@@ -478,7 +478,7 @@ define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind {
; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
-; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & bst32)
; AVX512-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
@@ -966,7 +966,7 @@ define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind {
; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $1, %zmm0, %zmm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
-; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & bst32)
; AVX512-NEXT: vpaddb %zmm2, %zmm0, %zmm0
; AVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
@@ -1078,7 +1078,7 @@ define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind {
; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpsrlw $1, %zmm0, %zmm0
; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
-; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem)
+; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & bst32)
; AVX512-NEXT: vpaddb %zmm2, %zmm0, %zmm0
; AVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index a78d97782e6a3..ad68e1ce36949 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -351,7 +351,7 @@ define <8 x double> @ulto8f64(<8 x i64> %a) {
; NODQ-LABEL: ulto8f64:
; NODQ: # %bb.0:
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
-; NODQ-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
+; NODQ-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & bst64)
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
; NODQ-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
; NODQ-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll
index 23f4fcb1c77c6..ac98e9bd010e7 100644
--- a/llvm/test/CodeGen/X86/avx512-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512-logic.ll
@@ -889,7 +889,7 @@ define <16 x i32> @ternlog_xor_andn(<16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
define <16 x i32> @ternlog_or_and_mask(<16 x i32> %x, <16 x i32> %y) {
; ALL-LABEL: ternlog_or_and_mask:
; ALL: ## %bb.0:
-; ALL-NEXT: vpternlogd {{.*#+}} zmm0 = (zmm0 & mem) | zmm1
+; ALL-NEXT: vpternlogd {{.*#+}} zmm0 = (zmm0 & bst32) | zmm1
; ALL-NEXT: retq
%a = and <16 x i32> %x, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
%b = or <16 x i32> %a, %y
@@ -899,7 +899,7 @@ define <16 x i32> @ternlog_or_and_mask(<16 x i32> %x, <16 x i32> %y) {
define <8 x i64> @ternlog_xor_and_mask(<8 x i64> %x, <8 x i64> %y) {
; ALL-LABEL: ternlog_xor_and_mask:
; ALL: ## %bb.0:
-; ALL-NEXT: vpternlogq {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem)
+; ALL-NEXT: vpternlogq {{.*#+}} zmm0 = zmm1 ^ (zmm0 & bst64)
; ALL-NEXT: retq
%a = and <8 x i64> %x, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%b = xor <8 x i64> %a, %y
diff --git a/llvm/test/CodeGen/X86/avx512fp16-arith.ll b/llvm/test/CodeGen/X86/avx512fp16-arith.ll
index b264f5fc34688..cdf6526465fd5 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-arith.ll
@@ -384,7 +384,7 @@ declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
define <8 x half> @fcopysignv8f16(<8 x half> %x, <8 x half> %y) {
; CHECK-LABEL: fcopysignv8f16:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (mem & (xmm0 ^ xmm1))
+; CHECK-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (bst32 & (xmm0 ^ xmm1))
; CHECK-NEXT: retq
%a = call <8 x half> @llvm.copysign.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %a
@@ -439,7 +439,7 @@ declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
define <16 x half> @fcopysignv16f16(<16 x half> %x, <16 x half> %y) {
; CHECK-LABEL: fcopysignv16f16:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (mem & (ymm0 ^ ymm1))
+; CHECK-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (bst32 & (ymm0 ^ ymm1))
; CHECK-NEXT: retq
%a = call <16 x half> @llvm.copysign.v16f16(<16 x half> %x, <16 x half> %y)
ret <16 x half> %a
@@ -494,7 +494,7 @@ declare <32 x half> @llvm.fabs.v32f16(<32 x half>)
define <32 x half> @fcopysignv32f16(<32 x half> %x, <32 x half> %y) {
; CHECK-LABEL: fcopysignv32f16:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (mem & (zmm0 ^ zmm1))
+; CHECK-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (bst32 & (zmm0 ^ zmm1))
; CHECK-NEXT: retq
%a = call <32 x half> @llvm.copysign.v32f16(<32 x half> %x, <32 x half> %y)
ret <32 x half> %a
diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll
index 284a0eb33047c..cbf930fc46498 100644
--- a/llvm/test/CodeGen/X86/avx512vl-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll
@@ -1039,7 +1039,7 @@ define <4 x i32> @ternlog_xor_andn(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
define <4 x i32> @ternlog_or_and_mask(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: ternlog_or_and_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd {{.*#+}} xmm0 = (xmm0 & mem) | xmm1
+; CHECK-NEXT: vpternlogd {{.*#+}} xmm0 = (xmm0 & bst32) | xmm1
; CHECK-NEXT: retq
%a = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
%b = or <4 x i32> %a, %y
@@ -1049,7 +1049,7 @@ define <4 x i32> @ternlog_or_and_mask(<4 x i32> %x, <4 x i32> %y) {
define <8 x i32> @ternlog_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y) {
; CHECK-LABEL: ternlog_or_and_mask_ymm:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd {{.*#+}} ymm0 = (ymm0 & mem) | ymm1
+; CHECK-NEXT: vpternlogd {{.*#+}} ymm0 = (ymm0 & bst32) | ymm1
; CHECK-NEXT: retq
%a = and <8 x i32> %x, <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
%b = or <8 x i32> %a, %y
@@ -1059,7 +1059,7 @@ define <8 x i32> @ternlog_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y) {
define <2 x i64> @ternlog_xor_and_mask(<2 x i64> %x, <2 x i64> %y) {
; CHECK-LABEL: ternlog_xor_and_mask:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem)
+; CHECK-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & bst64)
; CHECK-NEXT: retq
%a = and <2 x i64> %x, <i64 1099511627775, i64 1099511627775>
%b = xor <2 x i64> %a, %y
@@ -1069,7 +1069,7 @@ define <2 x i64> @ternlog_xor_and_mask(<2 x i64> %x, <2 x i64> %y) {
define <4 x i64> @ternlog_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y) {
; CHECK-LABEL: ternlog_xor_and_mask_ymm:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm0 & mem)
+; CHECK-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm0 & bst64)
; CHECK-NEXT: retq
%a = and <4 x i64> %x, <i64 72057594037927935, i64 72057594037927935, i64 72057594037927935, i64 72057...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/143721
More information about the llvm-commits
mailing list