[llvm] [X86][FP16] Customize MLOAD/MSTROE(vXf16) if VLX is not enabled (PR #142331)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 1 22:23:29 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Phoebe Wang (phoebewang)
<details>
<summary>Changes</summary>
Fixes: https://godbolt.org/z/fa4z97xsY
---
Patch is 46.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142331.diff
2 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+7-6)
- (modified) llvm/test/CodeGen/X86/avx512fp16-mov.ll (+538-198)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b1a3e3c006bb3..2beb697548553 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2238,7 +2238,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
- for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
+ for (auto VT : {MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16,
+ MVT::v16f16, MVT::v8f16}) {
setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
}
@@ -33192,8 +33193,8 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
"Cannot lower masked load op.");
assert((ScalarVT.getSizeInBits() >= 32 ||
- (Subtarget.hasBWI() &&
- (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
+ (Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16 ||
+ ScalarVT == MVT::f16))) &&
"Unsupported masked load op.");
// This operation is legal for targets with VLX, but without
@@ -33240,9 +33241,9 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
"Cannot lower masked store op.");
assert((ScalarVT.getSizeInBits() >= 32 ||
- (Subtarget.hasBWI() &&
- (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
- "Unsupported masked store op.");
+ (Subtarget.hasBWI() && (ScalarVT == MVT::i8 || ScalarVT == MVT::i16 ||
+ ScalarVT == MVT::f16))) &&
+ "Unsupported masked store op.");
// This operation is legal for targets with VLX, but without
// VLX the vector should be widened to 512 bit
diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
index 82efaffe4014b..526511c850451 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,X64VL
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=CHECK,X64,X64-NOVL
define <8 x half> @broadcastph128(ptr %x) {
; X64-LABEL: broadcastph128:
@@ -314,31 +315,47 @@ define <8 x half> @test14(half %x) {
}
define <16 x half> @test14b(half %x) {
-; X64-LABEL: test14b:
-; X64: # %bb.0:
-; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT: vmovsh %xmm0, %xmm1, %xmm0
-; X64-NEXT: retq
+; X64VL-LABEL: test14b:
+; X64VL: # %bb.0:
+; X64VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64VL-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; X64VL-NEXT: retq
;
; X86-LABEL: test14b:
; X86: # %bb.0:
; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: test14b:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-NOVL-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; X64-NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-NOVL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; X64-NOVL-NEXT: retq
%res = insertelement <16 x half>zeroinitializer, half %x, i32 0
ret <16 x half>%res
}
define <32 x half> @test14c(half %x) {
-; X64-LABEL: test14c:
-; X64: # %bb.0:
-; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT: vmovsh %xmm0, %xmm1, %xmm0
-; X64-NEXT: retq
+; X64VL-LABEL: test14c:
+; X64VL: # %bb.0:
+; X64VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64VL-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; X64VL-NEXT: retq
;
; X86-LABEL: test14c:
; X86: # %bb.0:
; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: test14c:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-NOVL-NEXT: vmovsh %xmm0, %xmm1, %xmm0
+; X64-NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
+; X64-NOVL-NEXT: retq
%res = insertelement <32 x half>zeroinitializer, half %x, i32 0
ret <32 x half>%res
}
@@ -578,13 +595,13 @@ declare void @llvm.masked.store.v32f16.p0(<32 x half>, ptr, i32, <32 x i1>)
declare <32 x half> @llvm.masked.load.v32f16.p0(ptr, i32, <32 x i1>, <32 x half>)
define void @storeu32f16mask(<32 x i1> %mask, ptr %addr, <32 x half> %val) {
-; X64-LABEL: storeu32f16mask:
-; X64: # %bb.0:
-; X64-NEXT: vpsllw $7, %ymm0, %ymm0
-; X64-NEXT: vpmovb2m %ymm0, %k1
-; X64-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1}
-; X64-NEXT: vzeroupper
-; X64-NEXT: retq
+; X64VL-LABEL: storeu32f16mask:
+; X64VL: # %bb.0:
+; X64VL-NEXT: vpsllw $7, %ymm0, %ymm0
+; X64VL-NEXT: vpmovb2m %ymm0, %k1
+; X64VL-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1}
+; X64VL-NEXT: vzeroupper
+; X64VL-NEXT: retq
;
; X86-LABEL: storeu32f16mask:
; X86: # %bb.0:
@@ -594,17 +611,25 @@ define void @storeu32f16mask(<32 x i1> %mask, ptr %addr, <32 x half> %val) {
; X86-NEXT: vmovdqu16 %zmm1, (%eax) {%k1}
; X86-NEXT: vzeroupper
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: storeu32f16mask:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: vpsllw $7, %ymm0, %ymm0
+; X64-NOVL-NEXT: vpmovb2m %zmm0, %k1
+; X64-NOVL-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1}
+; X64-NOVL-NEXT: vzeroupper
+; X64-NOVL-NEXT: retq
call void @llvm.masked.store.v32f16.p0(<32 x half> %val, ptr %addr, i32 4, <32 x i1>%mask)
ret void
}
define <32 x half> @maskloadu32f16(ptr %addr, <32 x half> %val, <32 x i1> %mask) {
-; X64-LABEL: maskloadu32f16:
-; X64: # %bb.0:
-; X64-NEXT: vpsllw $7, %ymm1, %ymm1
-; X64-NEXT: vpmovb2m %ymm1, %k1
-; X64-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1}
-; X64-NEXT: retq
+; X64VL-LABEL: maskloadu32f16:
+; X64VL: # %bb.0:
+; X64VL-NEXT: vpsllw $7, %ymm1, %ymm1
+; X64VL-NEXT: vpmovb2m %ymm1, %k1
+; X64VL-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1}
+; X64VL-NEXT: retq
;
; X86-LABEL: maskloadu32f16:
; X86: # %bb.0:
@@ -613,17 +638,24 @@ define <32 x half> @maskloadu32f16(ptr %addr, <32 x half> %val, <32 x i1> %mask)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovdqu16 (%eax), %zmm0 {%k1}
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: maskloadu32f16:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: vpsllw $7, %ymm1, %ymm1
+; X64-NOVL-NEXT: vpmovb2m %zmm1, %k1
+; X64-NOVL-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1}
+; X64-NOVL-NEXT: retq
%res = call <32 x half> @llvm.masked.load.v32f16.p0(ptr %addr, i32 4, <32 x i1> %mask, <32 x half> %val)
ret <32 x half> %res
}
define <32 x half> @maskuloadu32f16(ptr %addr, <32 x i1> %mask) {
-; X64-LABEL: maskuloadu32f16:
-; X64: # %bb.0:
-; X64-NEXT: vpsllw $7, %ymm0, %ymm0
-; X64-NEXT: vpmovb2m %ymm0, %k1
-; X64-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
-; X64-NEXT: retq
+; X64VL-LABEL: maskuloadu32f16:
+; X64VL: # %bb.0:
+; X64VL-NEXT: vpsllw $7, %ymm0, %ymm0
+; X64VL-NEXT: vpmovb2m %ymm0, %k1
+; X64VL-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
+; X64VL-NEXT: retq
;
; X86-LABEL: maskuloadu32f16:
; X86: # %bb.0:
@@ -632,17 +664,24 @@ define <32 x half> @maskuloadu32f16(ptr %addr, <32 x i1> %mask) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovdqu16 (%eax), %zmm0 {%k1} {z}
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: maskuloadu32f16:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: vpsllw $7, %ymm0, %ymm0
+; X64-NOVL-NEXT: vpmovb2m %zmm0, %k1
+; X64-NOVL-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
+; X64-NOVL-NEXT: retq
%res = call <32 x half> @llvm.masked.load.v32f16.p0(ptr %addr, i32 4, <32 x i1> %mask, <32 x half> undef)
ret <32 x half> %res
}
define <32 x half> @maskzloadu32f16(ptr %addr, <32 x i1> %mask) {
-; X64-LABEL: maskzloadu32f16:
-; X64: # %bb.0:
-; X64-NEXT: vpsllw $7, %ymm0, %ymm0
-; X64-NEXT: vpmovb2m %ymm0, %k1
-; X64-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
-; X64-NEXT: retq
+; X64VL-LABEL: maskzloadu32f16:
+; X64VL: # %bb.0:
+; X64VL-NEXT: vpsllw $7, %ymm0, %ymm0
+; X64VL-NEXT: vpmovb2m %ymm0, %k1
+; X64VL-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
+; X64VL-NEXT: retq
;
; X86-LABEL: maskzloadu32f16:
; X86: # %bb.0:
@@ -651,6 +690,13 @@ define <32 x half> @maskzloadu32f16(ptr %addr, <32 x i1> %mask) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovdqu16 (%eax), %zmm0 {%k1} {z}
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: maskzloadu32f16:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: vpsllw $7, %ymm0, %ymm0
+; X64-NOVL-NEXT: vpmovb2m %zmm0, %k1
+; X64-NOVL-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
+; X64-NOVL-NEXT: retq
%res = call <32 x half> @llvm.masked.load.v32f16.p0(ptr %addr, i32 4, <32 x i1> %mask, <32 x half> zeroinitializer)
ret <32 x half> %res
}
@@ -713,11 +759,11 @@ define <16 x half> @load16f16(ptr %a) {
}
define <16 x half> @load16f16mask(ptr %a, <16 x half> %b, i16 %c) {
-; X64-LABEL: load16f16mask:
-; X64: # %bb.0:
-; X64-NEXT: kmovd %esi, %k1
-; X64-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1}
-; X64-NEXT: retq
+; X64VL-LABEL: load16f16mask:
+; X64VL: # %bb.0:
+; X64VL-NEXT: kmovd %esi, %k1
+; X64VL-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1}
+; X64VL-NEXT: retq
;
; X86-LABEL: load16f16mask:
; X86: # %bb.0:
@@ -725,6 +771,15 @@ define <16 x half> @load16f16mask(ptr %a, <16 x half> %b, i16 %c) {
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vmovdqu16 (%eax), %ymm0 {%k1}
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: load16f16mask:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; X64-NOVL-NEXT: kmovd %esi, %k1
+; X64-NOVL-NEXT: vmovdqa (%rdi), %ymm1
+; X64-NOVL-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
+; X64-NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; X64-NOVL-NEXT: retq
%msk = bitcast i16 %c to <16 x i1>
%res0 = load <16 x half>, ptr %a
%res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %b
@@ -732,11 +787,11 @@ define <16 x half> @load16f16mask(ptr %a, <16 x half> %b, i16 %c) {
}
define <16 x half> @load16f16maskz(ptr %a, i16 %c) {
-; X64-LABEL: load16f16maskz:
-; X64: # %bb.0:
-; X64-NEXT: kmovd %esi, %k1
-; X64-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z}
-; X64-NEXT: retq
+; X64VL-LABEL: load16f16maskz:
+; X64VL: # %bb.0:
+; X64VL-NEXT: kmovd %esi, %k1
+; X64VL-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z}
+; X64VL-NEXT: retq
;
; X86-LABEL: load16f16maskz:
; X86: # %bb.0:
@@ -744,6 +799,14 @@ define <16 x half> @load16f16maskz(ptr %a, i16 %c) {
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vmovdqu16 (%eax), %ymm0 {%k1} {z}
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: load16f16maskz:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: kmovd %esi, %k1
+; X64-NOVL-NEXT: vmovdqa (%rdi), %ymm0
+; X64-NOVL-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
+; X64-NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; X64-NOVL-NEXT: retq
%msk = bitcast i16 %c to <16 x i1>
%res0 = load <16 x half>, ptr %a
%res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
@@ -766,11 +829,11 @@ define <16 x half> @loadu16f16(ptr %a) {
}
define <16 x half> @loadu16f16mask(ptr %a, <16 x half> %b, i16 %c) {
-; X64-LABEL: loadu16f16mask:
-; X64: # %bb.0:
-; X64-NEXT: kmovd %esi, %k1
-; X64-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1}
-; X64-NEXT: retq
+; X64VL-LABEL: loadu16f16mask:
+; X64VL: # %bb.0:
+; X64VL-NEXT: kmovd %esi, %k1
+; X64VL-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1}
+; X64VL-NEXT: retq
;
; X86-LABEL: loadu16f16mask:
; X86: # %bb.0:
@@ -778,6 +841,15 @@ define <16 x half> @loadu16f16mask(ptr %a, <16 x half> %b, i16 %c) {
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vmovdqu16 (%eax), %ymm0 {%k1}
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: loadu16f16mask:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; X64-NOVL-NEXT: kmovd %esi, %k1
+; X64-NOVL-NEXT: vmovdqu (%rdi), %ymm1
+; X64-NOVL-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
+; X64-NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; X64-NOVL-NEXT: retq
%msk = bitcast i16 %c to <16 x i1>
%res0 = load <16 x half>, ptr %a, align 8
%res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %b
@@ -785,11 +857,11 @@ define <16 x half> @loadu16f16mask(ptr %a, <16 x half> %b, i16 %c) {
}
define <16 x half> @loadu16f16maskz(ptr %a, i16 %c) {
-; X64-LABEL: loadu16f16maskz:
-; X64: # %bb.0:
-; X64-NEXT: kmovd %esi, %k1
-; X64-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z}
-; X64-NEXT: retq
+; X64VL-LABEL: loadu16f16maskz:
+; X64VL: # %bb.0:
+; X64VL-NEXT: kmovd %esi, %k1
+; X64VL-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z}
+; X64VL-NEXT: retq
;
; X86-LABEL: loadu16f16maskz:
; X86: # %bb.0:
@@ -797,6 +869,14 @@ define <16 x half> @loadu16f16maskz(ptr %a, i16 %c) {
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vmovdqu16 (%eax), %ymm0 {%k1} {z}
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: loadu16f16maskz:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: kmovd %esi, %k1
+; X64-NOVL-NEXT: vmovdqu (%rdi), %ymm0
+; X64-NOVL-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
+; X64-NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; X64-NOVL-NEXT: retq
%msk = bitcast i16 %c to <16 x i1>
%res0 = load <16 x half>, ptr %a, align 8
%res = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer
@@ -841,13 +921,13 @@ declare void @llvm.masked.store.v16f16.p0(<16 x half>, ptr, i32, <16 x i1>)
declare <16 x half> @llvm.masked.load.v16f16.p0(ptr, i32, <16 x i1>, <16 x half>)
define void @storeu16f16mask(<16 x i1> %mask, ptr %addr, <16 x half> %val) {
-; X64-LABEL: storeu16f16mask:
-; X64: # %bb.0:
-; X64-NEXT: vpsllw $7, %xmm0, %xmm0
-; X64-NEXT: vpmovb2m %xmm0, %k1
-; X64-NEXT: vmovdqu16 %ymm1, (%rdi) {%k1}
-; X64-NEXT: vzeroupper
-; X64-NEXT: retq
+; X64VL-LABEL: storeu16f16mask:
+; X64VL: # %bb.0:
+; X64VL-NEXT: vpsllw $7, %xmm0, %xmm0
+; X64VL-NEXT: vpmovb2m %xmm0, %k1
+; X64VL-NEXT: vmovdqu16 %ymm1, (%rdi) {%k1}
+; X64VL-NEXT: vzeroupper
+; X64VL-NEXT: retq
;
; X86-LABEL: storeu16f16mask:
; X86: # %bb.0:
@@ -857,17 +937,27 @@ define void @storeu16f16mask(<16 x i1> %mask, ptr %addr, <16 x half> %val) {
; X86-NEXT: vmovdqu16 %ymm1, (%eax) {%k1}
; X86-NEXT: vzeroupper
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: storeu16f16mask:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; X64-NOVL-NEXT: vpsllw $7, %xmm0, %xmm0
+; X64-NOVL-NEXT: vpmovb2m %zmm0, %k0
+; X64-NOVL-NEXT: kmovw %k0, %k1
+; X64-NOVL-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1}
+; X64-NOVL-NEXT: vzeroupper
+; X64-NOVL-NEXT: retq
call void @llvm.masked.store.v16f16.p0(<16 x half> %val, ptr %addr, i32 4, <16 x i1>%mask)
ret void
}
define <16 x half> @maskloadu16f16(ptr %addr, <16 x half> %val, <16 x i1> %mask) {
-; X64-LABEL: maskloadu16f16:
-; X64: # %bb.0:
-; X64-NEXT: vpsllw $7, %xmm1, %xmm1
-; X64-NEXT: vpmovb2m %xmm1, %k1
-; X64-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1}
-; X64-NEXT: retq
+; X64VL-LABEL: maskloadu16f16:
+; X64VL: # %bb.0:
+; X64VL-NEXT: vpsllw $7, %xmm1, %xmm1
+; X64VL-NEXT: vpmovb2m %xmm1, %k1
+; X64VL-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1}
+; X64VL-NEXT: retq
;
; X86-LABEL: maskloadu16f16:
; X86: # %bb.0:
@@ -876,17 +966,27 @@ define <16 x half> @maskloadu16f16(ptr %addr, <16 x half> %val, <16 x i1> %mask)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovdqu16 (%eax), %ymm0 {%k1}
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: maskloadu16f16:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; X64-NOVL-NEXT: vpsllw $7, %xmm1, %xmm1
+; X64-NOVL-NEXT: vpmovb2m %zmm1, %k0
+; X64-NOVL-NEXT: kmovw %k0, %k1
+; X64-NOVL-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1}
+; X64-NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; X64-NOVL-NEXT: retq
%res = call <16 x half> @llvm.masked.load.v16f16.p0(ptr %addr, i32 4, <16 x i1> %mask, <16 x half> %val)
ret <16 x half> %res
}
define <16 x half> @maskuloadu16f16(ptr %addr, <16 x i1> %mask) {
-; X64-LABEL: maskuloadu16f16:
-; X64: # %bb.0:
-; X64-NEXT: vpsllw $7, %xmm0, %xmm0
-; X64-NEXT: vpmovb2m %xmm0, %k1
-; X64-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z}
-; X64-NEXT: retq
+; X64VL-LABEL: maskuloadu16f16:
+; X64VL: # %bb.0:
+; X64VL-NEXT: vpsllw $7, %xmm0, %xmm0
+; X64VL-NEXT: vpmovb2m %xmm0, %k1
+; X64VL-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z}
+; X64VL-NEXT: retq
;
; X86-LABEL: maskuloadu16f16:
; X86: # %bb.0:
@@ -895,17 +995,26 @@ define <16 x half> @maskuloadu16f16(ptr %addr, <16 x i1> %mask) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovdqu16 (%eax), %ymm0 {%k1} {z}
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: maskuloadu16f16:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: vpsllw $7, %xmm0, %xmm0
+; X64-NOVL-NEXT: vpmovb2m %zmm0, %k0
+; X64-NOVL-NEXT: kmovw %k0, %k1
+; X64-NOVL-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
+; X64-NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; X64-NOVL-NEXT: retq
%res = call <16 x half> @llvm.masked.load.v16f16.p0(ptr %addr, i32 4, <16 x i1> %mask, <16 x half> undef)
ret <16 x half> %res
}
define <16 x half> @maskzloadu16f16(ptr %addr, <16 x i1> %mask) {
-; X64-LABEL: maskzloadu16f16:
-; X64: # %bb.0:
-; X64-NEXT: vpsllw $7, %xmm0, %xmm0
-; X64-NEXT: vpmovb2m %xmm0, %k1
-; X64-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z}
-; X64-NEXT: retq
+; X64VL-LABEL: maskzloadu16f16:
+; X64VL: # %bb.0:
+; X64VL-NEXT: vpsllw $7, %xmm0, %xmm0
+; X64VL-NEXT: vpmovb2m %xmm0, %k1
+; X64VL-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z}
+; X64VL-NEXT: retq
;
; X86-LABEL: maskzloadu16f16:
; X86: # %bb.0:
@@ -914,6 +1023,15 @@ define <16 x half> @maskzloadu16f16(ptr %addr, <16 x i1> %mask) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovdqu16 (%eax), %ymm0 {%k1} {z}
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: maskzloadu16f16:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: vpsllw $7, %xmm0, %xmm0
+; X64-NOVL-NEXT: vpmovb2m %zmm0, %k0
+; X64-NOVL-NEXT: kmovw %k0, %k1
+; X64-NOVL-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
+; X64-NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; X64-NOVL-NEXT: retq
%res = call <16 x half> @llvm.masked.load.v16f16.p0(ptr %addr, i32 4, <16 x i1> %mask, <16 x half> zeroinitializer)
ret <16 x half> %res
}
@@ -927,34 +1045,51 @@ define <16 x half> @movrr16f16(<16 x half> %a, <16 x half> %b) {
}
define <16 x half> @movrrk16f16(<16 x half> %a, <16 x half> %b, i16 %msk) {
-; X64-LABEL: movrrk16f16:
-; X64: # %bb.0:
-; X64-NEXT: kmovd %edi, %k1
-; X64-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
-; X64-NEXT: retq
+; X64VL-LABEL: movrrk16f16:
+; X64VL: # %bb.0:
+; X64VL-NEXT: kmovd %edi, %k1
+; X64VL-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
+; X64VL-NEXT: retq
;
; X86-LABEL: movrrk16f16:
; X86: # %bb.0:
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
+;
+; X64-NOVL-LABEL: movrrk16f16:
+; X64-NOVL: # %bb.0:
+; X64-NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; X64-NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; X64-NOVL-NEXT: kmovd %edi, %k1
+; X64-NOVL-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
+; X64-NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; X64-NOVL-NEXT: retq
%mask = bitcast i16 %msk to <16 x i1>
%res = select <16 x i1> %mask, <16 x half> %a, <16 x half> %b
...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/142331
More information about the llvm-commits
mailing list