<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/93000>93000</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[AVX-512] Prefer `knotd` over `not+kmovd` when already in k register
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
Validark
</td>
</tr>
</table>
<pre>
```zig
const std = @import("std");
fn vpexpand(a: anytype, b: @TypeOf(a), n: std.meta.Int(.unsigned, @typeInfo(@TypeOf(a)).Vector.len)) @TypeOf(a) {
return struct {
extern fn @"llvm.x86.avx512.mask.expand.w"(@TypeOf(a), @TypeOf(a), std.meta.Int(.unsigned, @typeInfo(@TypeOf(a)).Vector.len)) @TypeOf(a);
}.@"llvm.x86.avx512.mask.expand.w"(a, b, n);
}
export fn foo_znver4(x: [*:0]u32, dest: *[32]u16, a: @Vector(32, u16), b: @Vector(32, u16)) void {
var cur = x[0..];
while (true) {
const n = cur[0];
dest.* = vpexpand(b, vpexpand(a, @splat(0), n), ~n);
cur = cur[1..];
if (cur[0] == 0) break;
}
}
```
LLVM IR:
```llvm
define dso_local void @foo_znver4(ptr nocapture nonnull readonly align 4 %0, ptr nocapture nonnull writeonly align 2 %1, <32 x i16> %2, <32 x i16> %3) local_unnamed_addr {
Entry:
%.pre = load i32, ptr %0, align 4
%scevgep = getelementptr i8, ptr %0, i64 4
br label %Loop
Loop:
%lsr.iv = phi ptr [ %scevgep5, %Loop ], [ %scevgep, %Entry ]
%4 = phi i32 [ %.pre, %Entry ], [ %10, %Loop ]
%5 = bitcast i32 %4 to <32 x i1>
%6 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %2, <32 x i16> zeroinitializer, <32 x i1> %5)
%7 = xor i32 %4, -1
%8 = bitcast i32 %7 to <32 x i1>
%9 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %3, <32 x i16> %6, <32 x i1> %8)
store <32 x i16> %9, ptr %1, align 2
%10 = load i32, ptr %lsr.iv, align 4
%11 = icmp eq i32 %10, 0
%scevgep5 = getelementptr i8, ptr %lsr.iv, i64 4
br i1 %11, label %Then, label %Loop
Then:
ret void
}
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
declare <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16>, <32 x i16>, <32 x i1>) #2
```
Zen 4 emit and suggested improvement:
```diff
foo_znver4:
mov eax, dword ptr [rdi]
add rdi, 4
.LBB0_1:
kmovd k1, eax
- not eax
vpexpandw zmm2 {k1} {z}, zmm0
- kmovd k1, eax
+ knotd k1, k1
vpexpandw zmm2 {k1}, zmm1
vmovdqu64 zmmword ptr [rsi], zmm2
mov eax, dword ptr [rdi]
add rdi, 4
test eax, eax
jne .LBB0_1
vzeroupper
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy0V1-P4jYQ_zTmxdoomSRAHniA5VY6aatW1WlV9WVl4gF8JHbOcVjYh372ahwCCbDXq9pDqw3x_J_fb2wj6lptNOKMpQuWLkeicVtjZy-iUFLY3Whl5HHGxmH79642LFyycJ4bXTteO8lZvOQsCVVZGesYTBlA7SQDYJCxeNGqt__Xmu8rPFRCSwZTweI5F_rojhUyeOQremdJ-OVY4a9rrwAZCTQJaieDEp0IPmuKEjTa5y1JgSUhOfms14YSuHGRBS-YO2ODAnW7cBOHs8kpVW7RNVbz2tkmd7310wcPDq3ma00-GEBR7MvgMB0HYn9IIwhKUe-CtsjgzbfhTkaPdwv9yTWe0eBssgx-OHvRguOB6EM6WfaxxQPhT21ZG_P6rvdoEwbTgwc1XTCYs3gesnTZxEC-JNbOy2DO0kUMJInGJBEnHrT1MJi2Bl6a9YnygULG90bJIXB7YXneWE_WA0sXYRCwdHlFT1J826oCOYOpsw0OedFSXnsfeWPJS99Hxw8qLGAw93o9uvsODujfolpXhSC0wzPb2-dfg2533rsi2gSiYRmdklpTBZccyYKMKARfWRS7ARWu8eyGvd-b5-eXX_jn31k876-eVYlG7ZLEtdLIZW1eC5OL4gRGEg5oUTnLtclF5RqLXButm6LgFoU0ujhyUaiN5glnkIbUi_vqb1Y57OkD6Ue-rfFjDPzAVTRm8SdahvvLMXXE5_naaC1KlK9CSntB_ZN29niumpNNUFn0GBRGSK5a8lGGXban7HsmdY77DVbeaoMOCyxRO7JR02trNU4utivLC7HCgqTPxlQDSOh9kFlR20DtfZRqq1qv6aKXQOq70LrixBx6HWicFHzZXuPiPTk7VjF0ZtSMG5uL1yi8jnjxl3p_K-VyUbvWJwVxpocTiz_1LMbewglV8FwUxQ2eSfi97WwfAynC9MfY8Y7WKK2cEoV6RzvUONmlNKKX_Cbt_mLsuRqyeoh6KtN7RU--V3T2s4qO74_E-G6l016ltTN-Aq4tsx6Vo8sgQK-YKPxoclru3h-fKPJWKi8rjt-6rrXcCm_HLP2nObvEuh42FbXxSHYevC9bOlIfP55Er3CZRIvO73p3z0mJeSEsnrdFD55cbYK9KOjEmdIFQArnj4f73zPOII7uOf1fuHFLixtGnHKA7xwYfyJt4Fgqx4WWvG42G6wdSq7Kypq9B-aj00Sq9fp0Y7wcGvF8eMiVZt_ex8TB3yfejJXdlmel6u017UdI6Z8kg8cO9eB5sQhfoxvvu9LsSX_nqUAxvPiBa-POYQcW3dH-dnp_L0ugc2QXscmSvrwTE-CRBGHn7YMwDBZdHtq4i8Iu-hcxT7GuTSjit2acXEwGnavVaQcnX7cd_2_d7qQOa9fD7qaXXzX6Z4fOsALamZuqQjtct-iueDSSs1hmcSZGOIsm0TgewwTS0XaGEE-y8QqzLIuydTpOZBJHYQwIeZ7miRypGYSQhClAOIEU0mAFeTyFSS7XMpeAIUtCLIUqAj9gxm5Gqq4bnGVxGIYjv1PU_icVgMY37oV0nU6XIzsjm4dVs6lpQFXt6osXp1zhf4vNX_54SCMy4L9ZXKPlbBx6NrBxyM2-XdDGMVh4FtHy2xY1FwXdo45cab7jFjeqdmhHjS1mW-eqmqgOTwyeNsptm1WQm5LBk7_AtY-HypqvmDsGTz7tmsGTL-vvAAAA__9hWtMe">