[llvm] 15e9a6c - [llvm][CodeGen] Do not scalarize `llvm.masked.[gather|scatter]` operating on scalable vectors.
Francesco Petrogalli via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 16 09:03:13 PDT 2020
Author: Francesco Petrogalli
Date: 2020-09-16T16:00:28Z
New Revision: 15e9a6c2118fa3db2c80043e6679da5dcc72b3a7
URL: https://github.com/llvm/llvm-project/commit/15e9a6c2118fa3db2c80043e6679da5dcc72b3a7
DIFF: https://github.com/llvm/llvm-project/commit/15e9a6c2118fa3db2c80043e6679da5dcc72b3a7.diff
LOG: [llvm][CodeGen] Do not scalarize `llvm.masked.[gather|scatter]` operating on scalable vectors.
This patch prevents the `llvm.masked.gather` and `llvm.masked.scatter` intrinsics to be scalarized when invoked on scalable vectors.
The change in `Function.cpp` is needed to prevent the warning that is raised when `getNumElements` is used in place of `getElementCount` on `VectorType` instances. The tests guards for regressions on this change.
The tests makes sure that calls to `llvm.masked.[gather|scatter]` are still scalarized when:
# the intrinsics are operating on fixed size vectors, and
# the compiler is not targeting fixed length SVE code generation.
Reviewed By: efriedma, sdesmalen
Differential Revision: https://reviews.llvm.org/D86249
Added:
llvm/test/CodeGen/AArch64/llvm-masked-gather-legal-for-sve.ll
llvm/test/CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll
Modified:
llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
llvm/lib/IR/Function.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index 15b67e3b69cc..3443743a28c5 100644
--- a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -865,6 +865,12 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
bool &ModifiedDT) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
+ // The scalarization code below does not work for scalable vectors.
+ if (isa<ScalableVectorType>(II->getType()) ||
+ any_of(II->arg_operands(),
+ [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
+ return false;
+
switch (II->getIntrinsicID()) {
default:
break;
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index e701feae2256..d03ffbb8d008 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -1400,8 +1400,7 @@ static bool matchIntrinsicType(
auto *ReferenceType = dyn_cast<VectorType>(ArgTys[RefArgNumber]);
auto *ThisArgVecTy = dyn_cast<VectorType>(Ty);
if (!ThisArgVecTy || !ReferenceType ||
- (cast<FixedVectorType>(ReferenceType)->getNumElements() !=
- cast<FixedVectorType>(ThisArgVecTy)->getNumElements()))
+ (ReferenceType->getElementCount() != ThisArgVecTy->getElementCount()))
return true;
PointerType *ThisArgEltTy =
dyn_cast<PointerType>(ThisArgVecTy->getElementType());
diff --git a/llvm/test/CodeGen/AArch64/llvm-masked-gather-legal-for-sve.ll b/llvm/test/CodeGen/AArch64/llvm-masked-gather-legal-for-sve.ll
new file mode 100644
index 000000000000..1dffd76a1192
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llvm-masked-gather-legal-for-sve.ll
@@ -0,0 +1,63 @@
+; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -scalarize-masked-mem-intrin -S < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; Testing that masked gathers operating on scalable vectors that are
+; packed in SVE registers are not scalarized.
+
+; CHECK-LABEL: @masked_gather_nxv4i32(
+; CHECK: call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32
+define <vscale x 4 x i32> @masked_gather_nxv4i32(<vscale x 4 x i32*> %ld, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru) {
+ %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> %ld, i32 0, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru)
+ ret <vscale x 4 x i32> %res
+}
+
+; Testing that masked gathers operating on scalable vectors of FP data
+; that is packed in SVE registers are not scalarized.
+
+; CHECK-LABEL: @masked_gather_nxv2f64(
+; CHECK: call <vscale x 2 x double> @llvm.masked.gather.nxv2f64
+define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x double*> %ld, <vscale x 2 x i1> %masks, <vscale x 2 x double> %passthru) {
+ %res = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ld, i32 0, <vscale x 2 x i1> %masks, <vscale x 2 x double> %passthru)
+ ret <vscale x 2 x double> %res
+}
+
+; Testing that masked gathers operating on scalable vectors of FP data
+; that is unpacked in SVE registers are not scalarized.
+
+; CHECK-LABEL: @masked_gather_nxv2f16(
+; CHECK: call <vscale x 2 x half> @llvm.masked.gather.nxv2f16
+define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x half*> %ld, <vscale x 2 x i1> %masks, <vscale x 2 x half> %passthru) {
+ %res = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ld, i32 0, <vscale x 2 x i1> %masks, <vscale x 2 x half> %passthru)
+ ret <vscale x 2 x half> %res
+}
+
+; Testing that masked gathers operating on 64-bit fixed vectors are
+; scalarized because NEON doesn't have support for masked gather
+; instructions.
+
+; CHECK-LABEL: @masked_gather_v2f32(
+; CHECK-NOT: @llvm.masked.gather.v2f32(
+define <2 x float> @masked_gather_v2f32(<2 x float*> %ld, <2 x i1> %masks, <2 x float> %passthru) {
+ %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthru)
+ ret <2 x float> %res
+}
+
+; Testing that masked gathers operating on 128-bit fixed vectors are
+; scalarized because NEON doesn't have support for masked gather
+; instructions and because we are not targeting fixed width SVE.
+
+; CHECK-LABEL: @masked_gather_v4i32(
+; CHECK-NOT: @llvm.masked.gather.v4i32(
+define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ld, <4 x i1> %masks, <4 x i32> %passthru) {
+ %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
+ ret <4 x i32> %res
+}
+
+declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> %ptrs, i32 %align, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru)
+declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 %align, <vscale x 2 x i1> %masks, <vscale x 2 x double> %passthru)
+declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 %align, <vscale x 2 x i1> %masks, <vscale x 2 x half> %passthru)
+declare <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthru)
+declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthru)
diff --git a/llvm/test/CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll b/llvm/test/CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll
new file mode 100644
index 000000000000..caaa146aa959
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll
@@ -0,0 +1,63 @@
+; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -scalarize-masked-mem-intrin -S < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; Testing that masked scatters operating on scalable vectors that are
+; packed in SVE registers are not scalarized.
+
+; CHECK-LABEL: @masked_scatter_nxv4i32(
+; CHECK: call void @llvm.masked.scatter.nxv4i32
+define void @masked_scatter_nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %masks) {
+ call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
+ ret void
+}
+
+; Testing that masked scatters operating on scalable vectors of FP
+; data that is packed in SVE registers are not scalarized.
+
+; CHECK-LABEL: @masked_scatter_nxv2f64(
+; CHECK: call void @llvm.masked.scatter.nxv2f64
+define void @masked_scatter_nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %masks) {
+ call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
+ ret void
+}
+
+; Testing that masked scatters operating on scalable vectors of FP
+; data that is unpacked in SVE registers are not scalarized.
+
+; CHECK-LABEL: @masked_scatter_nxv2f16(
+; CHECK: call void @llvm.masked.scatter.nxv2f16
+define void @masked_scatter_nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %masks) {
+ call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
+ ret void
+}
+
+; Testing that masked scatters operating on 64-bit fixed vectors are
+; scalarized because NEON doesn't have support for masked scatter
+; instructions.
+
+; CHECK-LABEL: @masked_scatter_v2f32(
+; CHECK-NOT: @llvm.masked.scatter.v2f32(
+define void @masked_scatter_v2f32(<2 x float> %data, <2 x float*> %ptrs, <2 x i1> %masks) {
+ call void @llvm.masked.scatter.v2f32(<2 x float> %data, <2 x float*> %ptrs, i32 0, <2 x i1> %masks)
+ ret void
+}
+
+; Testing that masked scatters operating on 128-bit fixed vectors are
+; scalarized because NEON doesn't have support for masked scatter
+; instructions and because we are not targeting fixed width SVE.
+
+; CHECK-LABEL: @masked_scatter_v4i32(
+; CHECK-NOT: @llvm.masked.scatter.v4i32(
+define void @masked_scatter_v4i32(<4 x i32> %data, <4 x i32*> %ptrs, <4 x i1> %masks) {
+ call void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 0, <4 x i1> %masks)
+ ret void
+}
+
+declare void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 %align, <vscale x 4 x i1> %masks)
+declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 %align, <vscale x 2 x i1> %masks)
+declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 %align, <vscale x 2 x i1> %masks)
+declare void @llvm.masked.scatter.v2f32(<2 x float> %data, <2 x float*> %ptrs, i32 %align, <2 x i1> %masks)
+declare void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 %align, <4 x i1> %masks)
More information about the llvm-commits
mailing list