[llvm] 58d4fe2 - [X86][EVEX512] Do not allow 512-bit memcpy without EVEX512 (#70420)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 27 00:26:10 PDT 2023
Author: Phoebe Wang
Date: 2023-10-27T15:26:05+08:00
New Revision: 58d4fe287e02dab99eec282917c67abbb36fc3e4
URL: https://github.com/llvm/llvm-project/commit/58d4fe287e02dab99eec282917c67abbb36fc3e4
DIFF: https://github.com/llvm/llvm-project/commit/58d4fe287e02dab99eec282917c67abbb36fc3e4.diff
LOG: [X86][EVEX512] Do not allow 512-bit memcpy without EVEX512 (#70420)
Solves crash mentioned in #65920.
Added:
llvm/test/CodeGen/X86/evex512-mem.ll
Modified:
llvm/lib/Target/X86/X86ISelLoweringCall.cpp
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index c47ddae072b4fe8..2fe145f9267de87 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -281,7 +281,7 @@ EVT X86TargetLowering::getOptimalMemOpType(
if (Op.size() >= 16 &&
(!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
// FIXME: Check if unaligned 64-byte accesses are slow.
- if (Op.size() >= 64 && Subtarget.hasAVX512() &&
+ if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
(Subtarget.getPreferVectorWidth() >= 512)) {
return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
}
@@ -395,7 +395,7 @@ bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
return true;
return false;
case 512:
- if (Subtarget.hasAVX512())
+ if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
return true;
return false;
default:
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 884f22b006bcba6..8a04987e768a126 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -180,7 +180,7 @@ X86TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(ST->is64Bit() ? 64 : 32);
case TargetTransformInfo::RGK_FixedWidthVector:
- if (ST->hasAVX512() && PreferVectorWidth >= 512)
+ if (ST->hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
return TypeSize::getFixed(512);
if (ST->hasAVX() && PreferVectorWidth >= 256)
return TypeSize::getFixed(256);
@@ -6131,7 +6131,8 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
// Only enable vector loads for equality comparison. Right now the vector
// version is not as fast for three way compare (see #33329).
const unsigned PreferredWidth = ST->getPreferVectorWidth();
- if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64);
+ if (PreferredWidth >= 512 && ST->hasAVX512() && ST->hasEVEX512())
+ Options.LoadSizes.push_back(64);
if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32);
if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16);
}
diff --git a/llvm/test/CodeGen/X86/evex512-mem.ll b/llvm/test/CodeGen/X86/evex512-mem.ll
new file mode 100644
index 000000000000000..85bb3b3a5487feb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/evex512-mem.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s --check-prefix=AVX512
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl,-evex512 < %s | FileCheck %s --check-prefix=AVX256
+
+define void @test1() {
+; AVX512-LABEL: test1:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movq 64, %rax
+; AVX512-NEXT: movq %rax, (%rax)
+; AVX512-NEXT: vmovups 0, %zmm0
+; AVX512-NEXT: vmovups %zmm0, (%rax)
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+;
+; AVX256-LABEL: test1:
+; AVX256: # %bb.0:
+; AVX256-NEXT: movq 64, %rax
+; AVX256-NEXT: movq %rax, (%rax)
+; AVX256-NEXT: vmovups 0, %ymm0
+; AVX256-NEXT: vmovups 32, %ymm1
+; AVX256-NEXT: vmovups %ymm1, (%rax)
+; AVX256-NEXT: vmovups %ymm0, (%rax)
+; AVX256-NEXT: vzeroupper
+; AVX256-NEXT: retq
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 poison, ptr align 8 null, i64 72, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
More information about the llvm-commits
mailing list