[llvm] b507509 - [AArch64] Allow SVE code generation for fixed-width vectors (#67122)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 23 04:41:38 PDT 2023
Author: Igor Kirillov
Date: 2023-10-23T12:41:34+01:00
New Revision: b507509f6a938f3eb74b39c381327841d9fa46b1
URL: https://github.com/llvm/llvm-project/commit/b507509f6a938f3eb74b39c381327841d9fa46b1
DIFF: https://github.com/llvm/llvm-project/commit/b507509f6a938f3eb74b39c381327841d9fa46b1.diff
LOG: [AArch64] Allow SVE code generation for fixed-width vectors (#67122)
This patch allows the generation of SVE code with masks that mimic Neon.
Added:
llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll
llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a16a102e472e709..b05d6fb5232c85f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5619,9 +5619,7 @@ SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
assert(LoadNode && "Expected custom lowering of a masked load node");
EVT VT = Op->getValueType(0);
- if (useSVEForFixedLengthVectorVT(
- VT,
- /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
+ if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
SDValue PassThru = LoadNode->getPassThru();
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index a6baade412c77d2..0501c4228523215 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -254,7 +254,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
return false;
// For fixed vectors, avoid scalarization if using SVE for them.
- if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
+ if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors() &&
+ DataType->getPrimitiveSizeInBits() != 128)
return false; // Fall back to scalarization of masked operations.
return isElementTypeLegalForScalableVector(DataType->getScalarType());
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
new file mode 100644
index 000000000000000..67a53d4e15f3bfb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve < %s | FileCheck %s
+
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; Masked Load
+;
+
+define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) {
+; CHECK-LABEL: masked_load_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b, vl16
+; CHECK-NEXT: shl v0.16b, v0.16b, #7
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
+; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %load = call <16 x i8> @llvm.masked.load.v16i8(ptr %src, i32 8, <16 x i1> %mask, <16 x i8> zeroinitializer)
+ ret <16 x i8> %load
+}
+
+define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) {
+; CHECK-LABEL: masked_load_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: ptrue p0.h, vl8
+; CHECK-NEXT: shl v0.8h, v0.8h, #15
+; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
+; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %load = call <8 x half> @llvm.masked.load.v8f16(ptr %src, i32 8, <8 x i1> %mask, <8 x half> zeroinitializer)
+ ret <8 x half> %load
+}
+
+define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) {
+; CHECK-LABEL: masked_load_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: ptrue p0.s, vl4
+; CHECK-NEXT: shl v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
+; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %load = call <4 x float> @llvm.masked.load.v4f32(ptr %src, i32 8, <4 x i1> %mask, <4 x float> zeroinitializer)
+ ret <4 x float> %load
+}
+
+define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) {
+; CHECK-LABEL: masked_load_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: shl v0.2d, v0.2d, #63
+; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %load = call <2 x double> @llvm.masked.load.v2f64(ptr %src, i32 8, <2 x i1> %mask, <2 x double> zeroinitializer)
+ ret <2 x double> %load
+}
+
+define <2 x double> @masked_load_passthru_v2f64(ptr %src, <2 x i1> %mask, <2 x double> %passthru) {
+; CHECK-LABEL: masked_load_passthru_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: shl v0.2d, v0.2d, #63
+; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %load = call <2 x double> @llvm.masked.load.v2f64(ptr %src, i32 8, <2 x i1> %mask, <2 x double> %passthru)
+ ret <2 x double> %load
+}
+
+declare <16 x i8> @llvm.masked.load.v16i8(ptr, i32, <16 x i1>, <16 x i8>)
+declare <8 x half> @llvm.masked.load.v8f16(ptr, i32, <8 x i1>, <8 x half>)
+declare <4 x float> @llvm.masked.load.v4f32(ptr, i32, <4 x i1>, <4 x float>)
+declare <2 x double> @llvm.masked.load.v2f64(ptr, i32, <2 x i1>, <2 x double>)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll
new file mode 100644
index 000000000000000..bdd6ce0647016cf
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-stores.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve < %s | FileCheck %s
+
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; Masked Store
+;
+
+define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) {
+; CHECK-LABEL: masked_store_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b, vl16
+; CHECK-NEXT: shl v0.16b, v0.16b, #7
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
+; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: st1b { z1.b }, p0, [x0]
+; CHECK-NEXT: ret
+ call void @llvm.masked.store.v16i8(<16 x i8> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask)
+ ret void
+}
+
+define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) {
+; CHECK-LABEL: masked_store_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: ptrue p0.h, vl8
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: shl v0.8h, v0.8h, #15
+; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
+; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT: st1h { z1.h }, p0, [x0]
+; CHECK-NEXT: ret
+ call void @llvm.masked.store.v8f16(<8 x half> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask)
+ ret void
+}
+
+define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) {
+; CHECK-LABEL: masked_store_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: ptrue p0.s, vl4
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: shl v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
+; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: st1w { z1.s }, p0, [x0]
+; CHECK-NEXT: ret
+ call void @llvm.masked.store.v4f32(<4 x float> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
+ ret void
+}
+
+define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) {
+; CHECK-LABEL: masked_store_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: shl v0.2d, v0.2d, #63
+; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
+; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: st1d { z1.d }, p0, [x0]
+; CHECK-NEXT: ret
+ call void @llvm.masked.store.v2f64(<2 x double> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask)
+ ret void
+}
+
+declare void @llvm.masked.store.v16i8(<16 x i8>, ptr, i32, <16 x i1>)
+declare void @llvm.masked.store.v8f16(<8 x half>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v4f32(<4 x float>, ptr, i32, <4 x i1>)
+declare void @llvm.masked.store.v2f64(<2 x double>, ptr, i32, <2 x i1>)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
index a63b90856a66d82..5dfce78af18b8e6 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
@@ -50,7 +50,7 @@ define <2 x float> @masked_load_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
ret <2 x float> %load
}
-define <4 x float> @masked_load_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
+define <4 x float> @masked_load_v4f32(ptr %ap, ptr %bp) vscale_range(1,0) #0 {
; CHECK-LABEL: masked_load_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
index 384b2cc6269328a..db0a5b2ba942532 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
@@ -48,7 +48,7 @@ define void @masked_store_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
ret void
}
-define void @masked_store_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
+define void @masked_store_v4f32(ptr %ap, ptr %bp) vscale_range(1,0) #0 {
; CHECK-LABEL: masked_store_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
diff --git a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll
index de89cce9fa9028c..fe14f2ff17f8387 100644
--- a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll
+++ b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu | FileCheck -check-prefixes=CHECK,CHECK-LE %s
-; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck -check-prefixes=CHECK,CHECK-LE %s
-; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64_be-linux-gnu -data-layout="E-m:o-i64:64-i128:128-n32:64-S128" | FileCheck -check-prefixes=CHECK,CHECK-BE %s
+; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu | FileCheck -check-prefixes=CHECK-LE-COMMON,CHECK-LE %s
+; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck -check-prefixes=CHECK-LE-COMMON,CHECK-LE-SVE %s
+; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64_be-linux-gnu -data-layout="E-m:o-i64:64-i128:128-n32:64-S128" | FileCheck -check-prefixes=CHECK-BE %s
define <2 x i64> @scalarize_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %passthru) {
; CHECK-LE-LABEL: @scalarize_v2i64(
@@ -28,6 +28,10 @@ define <2 x i64> @scalarize_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %passthru) {
; CHECK-LE-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
; CHECK-LE-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
;
+; CHECK-LE-SVE-LABEL: @scalarize_v2i64(
+; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 128, <2 x i1> [[MASK:%.*]], <2 x i64> [[PASSTHRU:%.*]])
+; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
+;
; CHECK-BE-LABEL: @scalarize_v2i64(
; CHECK-BE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
; CHECK-BE-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], -2
@@ -57,28 +61,53 @@ define <2 x i64> @scalarize_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %passthru) {
}
define <2 x i64> @scalarize_v2i64_ones_mask(ptr %p, <2 x i64> %passthru) {
-; CHECK-LABEL: @scalarize_v2i64_ones_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
-; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+; CHECK-LE-LABEL: @scalarize_v2i64_ones_mask(
+; CHECK-LE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
+; CHECK-LE-NEXT: ret <2 x i64> [[TMP1]]
+;
+; CHECK-LE-SVE-LABEL: @scalarize_v2i64_ones_mask(
+; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> [[PASSTHRU:%.*]])
+; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
+;
+; CHECK-BE-LABEL: @scalarize_v2i64_ones_mask(
+; CHECK-BE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
+; CHECK-BE-NEXT: ret <2 x i64> [[TMP1]]
;
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr %p, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %passthru)
ret <2 x i64> %ret
}
define <2 x i64> @scalarize_v2i64_zero_mask(ptr %p, <2 x i64> %passthru) {
-; CHECK-LABEL: @scalarize_v2i64_zero_mask(
-; CHECK-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
+; CHECK-LE-LABEL: @scalarize_v2i64_zero_mask(
+; CHECK-LE-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
+;
+; CHECK-LE-SVE-LABEL: @scalarize_v2i64_zero_mask(
+; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> zeroinitializer, <2 x i64> [[PASSTHRU:%.*]])
+; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
+;
+; CHECK-BE-LABEL: @scalarize_v2i64_zero_mask(
+; CHECK-BE-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
;
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr %p, i32 8, <2 x i1> <i1 false, i1 false>, <2 x i64> %passthru)
ret <2 x i64> %ret
}
define <2 x i64> @scalarize_v2i64_const_mask(ptr %p, <2 x i64> %passthru) {
-; CHECK-LABEL: @scalarize_v2i64_const_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP2]], i64 1
-; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+; CHECK-LE-LABEL: @scalarize_v2i64_const_mask(
+; CHECK-LE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
+; CHECK-LE-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
+; CHECK-LE-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP2]], i64 1
+; CHECK-LE-NEXT: ret <2 x i64> [[TMP3]]
+;
+; CHECK-LE-SVE-LABEL: @scalarize_v2i64_const_mask(
+; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> [[PASSTHRU:%.*]])
+; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
+;
+; CHECK-BE-LABEL: @scalarize_v2i64_const_mask(
+; CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
+; CHECK-BE-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
+; CHECK-BE-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP2]], i64 1
+; CHECK-BE-NEXT: ret <2 x i64> [[TMP3]]
;
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr %p, i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> %passthru)
ret <2 x i64> %ret
@@ -86,29 +115,29 @@ define <2 x i64> @scalarize_v2i64_const_mask(ptr %p, <2 x i64> %passthru) {
; This use a byte sized but non power of 2 element size. This used to crash due to bad alignment calculation.
define <2 x i24> @scalarize_v2i24(ptr %p, <2 x i1> %mask, <2 x i24> %passthru) {
-; CHECK-LE-LABEL: @scalarize_v2i24(
-; CHECK-LE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
-; CHECK-LE-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
-; CHECK-LE-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
-; CHECK-LE-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
-; CHECK-LE: cond.load:
-; CHECK-LE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i24, ptr [[P:%.*]], i32 0
-; CHECK-LE-NEXT: [[TMP4:%.*]] = load i24, ptr [[TMP3]], align 1
-; CHECK-LE-NEXT: [[TMP5:%.*]] = insertelement <2 x i24> [[PASSTHRU:%.*]], i24 [[TMP4]], i64 0
-; CHECK-LE-NEXT: br label [[ELSE]]
-; CHECK-LE: else:
-; CHECK-LE-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i24> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
-; CHECK-LE-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
-; CHECK-LE-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
-; CHECK-LE-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
-; CHECK-LE: cond.load1:
-; CHECK-LE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i24, ptr [[P]], i32 1
-; CHECK-LE-NEXT: [[TMP9:%.*]] = load i24, ptr [[TMP8]], align 1
-; CHECK-LE-NEXT: [[TMP10:%.*]] = insertelement <2 x i24> [[RES_PHI_ELSE]], i24 [[TMP9]], i64 1
-; CHECK-LE-NEXT: br label [[ELSE2]]
-; CHECK-LE: else2:
-; CHECK-LE-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i24> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
-; CHECK-LE-NEXT: ret <2 x i24> [[RES_PHI_ELSE3]]
+; CHECK-LE-COMMON-LABEL: @scalarize_v2i24(
+; CHECK-LE-COMMON-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
+; CHECK-LE-COMMON-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
+; CHECK-LE-COMMON-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
+; CHECK-LE-COMMON-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
+; CHECK-LE-COMMON: cond.load:
+; CHECK-LE-COMMON-NEXT: [[TMP3:%.*]] = getelementptr inbounds i24, ptr [[P:%.*]], i32 0
+; CHECK-LE-COMMON-NEXT: [[TMP4:%.*]] = load i24, ptr [[TMP3]], align 1
+; CHECK-LE-COMMON-NEXT: [[TMP5:%.*]] = insertelement <2 x i24> [[PASSTHRU:%.*]], i24 [[TMP4]], i64 0
+; CHECK-LE-COMMON-NEXT: br label [[ELSE]]
+; CHECK-LE-COMMON: else:
+; CHECK-LE-COMMON-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i24> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
+; CHECK-LE-COMMON-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
+; CHECK-LE-COMMON-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
+; CHECK-LE-COMMON-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
+; CHECK-LE-COMMON: cond.load1:
+; CHECK-LE-COMMON-NEXT: [[TMP8:%.*]] = getelementptr inbounds i24, ptr [[P]], i32 1
+; CHECK-LE-COMMON-NEXT: [[TMP9:%.*]] = load i24, ptr [[TMP8]], align 1
+; CHECK-LE-COMMON-NEXT: [[TMP10:%.*]] = insertelement <2 x i24> [[RES_PHI_ELSE]], i24 [[TMP9]], i64 1
+; CHECK-LE-COMMON-NEXT: br label [[ELSE2]]
+; CHECK-LE-COMMON: else2:
+; CHECK-LE-COMMON-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i24> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-LE-COMMON-NEXT: ret <2 x i24> [[RES_PHI_ELSE3]]
;
; CHECK-BE-LABEL: @scalarize_v2i24(
; CHECK-BE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
@@ -140,29 +169,29 @@ define <2 x i24> @scalarize_v2i24(ptr %p, <2 x i1> %mask, <2 x i24> %passthru) {
; This use a byte sized but non power of 2 element size. This used to crash due to bad alignment calculation.
define <2 x i48> @scalarize_v2i48(ptr %p, <2 x i1> %mask, <2 x i48> %passthru) {
-; CHECK-LE-LABEL: @scalarize_v2i48(
-; CHECK-LE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
-; CHECK-LE-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
-; CHECK-LE-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
-; CHECK-LE-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
-; CHECK-LE: cond.load:
-; CHECK-LE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i48, ptr [[P:%.*]], i32 0
-; CHECK-LE-NEXT: [[TMP4:%.*]] = load i48, ptr [[TMP3]], align 2
-; CHECK-LE-NEXT: [[TMP5:%.*]] = insertelement <2 x i48> [[PASSTHRU:%.*]], i48 [[TMP4]], i64 0
-; CHECK-LE-NEXT: br label [[ELSE]]
-; CHECK-LE: else:
-; CHECK-LE-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i48> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
-; CHECK-LE-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
-; CHECK-LE-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
-; CHECK-LE-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
-; CHECK-LE: cond.load1:
-; CHECK-LE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i48, ptr [[P]], i32 1
-; CHECK-LE-NEXT: [[TMP9:%.*]] = load i48, ptr [[TMP8]], align 2
-; CHECK-LE-NEXT: [[TMP10:%.*]] = insertelement <2 x i48> [[RES_PHI_ELSE]], i48 [[TMP9]], i64 1
-; CHECK-LE-NEXT: br label [[ELSE2]]
-; CHECK-LE: else2:
-; CHECK-LE-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i48> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
-; CHECK-LE-NEXT: ret <2 x i48> [[RES_PHI_ELSE3]]
+; CHECK-LE-COMMON-LABEL: @scalarize_v2i48(
+; CHECK-LE-COMMON-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
+; CHECK-LE-COMMON-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
+; CHECK-LE-COMMON-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
+; CHECK-LE-COMMON-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
+; CHECK-LE-COMMON: cond.load:
+; CHECK-LE-COMMON-NEXT: [[TMP3:%.*]] = getelementptr inbounds i48, ptr [[P:%.*]], i32 0
+; CHECK-LE-COMMON-NEXT: [[TMP4:%.*]] = load i48, ptr [[TMP3]], align 2
+; CHECK-LE-COMMON-NEXT: [[TMP5:%.*]] = insertelement <2 x i48> [[PASSTHRU:%.*]], i48 [[TMP4]], i64 0
+; CHECK-LE-COMMON-NEXT: br label [[ELSE]]
+; CHECK-LE-COMMON: else:
+; CHECK-LE-COMMON-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i48> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
+; CHECK-LE-COMMON-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
+; CHECK-LE-COMMON-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
+; CHECK-LE-COMMON-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
+; CHECK-LE-COMMON: cond.load1:
+; CHECK-LE-COMMON-NEXT: [[TMP8:%.*]] = getelementptr inbounds i48, ptr [[P]], i32 1
+; CHECK-LE-COMMON-NEXT: [[TMP9:%.*]] = load i48, ptr [[TMP8]], align 2
+; CHECK-LE-COMMON-NEXT: [[TMP10:%.*]] = insertelement <2 x i48> [[RES_PHI_ELSE]], i48 [[TMP9]], i64 1
+; CHECK-LE-COMMON-NEXT: br label [[ELSE2]]
+; CHECK-LE-COMMON: else2:
+; CHECK-LE-COMMON-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i48> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-LE-COMMON-NEXT: ret <2 x i48> [[RES_PHI_ELSE3]]
;
; CHECK-BE-LABEL: @scalarize_v2i48(
; CHECK-BE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
diff --git a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll
index 56c0d6e70980315..2f73227f056707c 100644
--- a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll
+++ b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu | FileCheck -check-prefixes=CHECK,CHECK-LE %s
-; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck -check-prefixes=CHECK,CHECK-LE %s
-; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64_be-linux-gnu -data-layout="E-m:o-i64:64-i128:128-n32:64-S128" | FileCheck -check-prefixes=CHECK,CHECK-BE %s
+; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu | FileCheck -check-prefixes=CHECK-LE %s
+; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck -check-prefixes=CHECK-SVE-LE %s
+; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64_be-linux-gnu -data-layout="E-m:o-i64:64-i128:128-n32:64-S128" | FileCheck -check-prefixes=CHECK-BE %s
define void @scalarize_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %data) {
; CHECK-LE-LABEL: @scalarize_v2i64(
@@ -26,6 +26,10 @@ define void @scalarize_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %data) {
; CHECK-LE: else2:
; CHECK-LE-NEXT: ret void
;
+; CHECK-SVE-LE-LABEL: @scalarize_v2i64(
+; CHECK-SVE-LE-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[DATA:%.*]], ptr [[P:%.*]], i32 128, <2 x i1> [[MASK:%.*]])
+; CHECK-SVE-LE-NEXT: ret void
+;
; CHECK-BE-LABEL: @scalarize_v2i64(
; CHECK-BE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
; CHECK-BE-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], -2
@@ -53,28 +57,53 @@ define void @scalarize_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %data) {
}
define void @scalarize_v2i64_ones_mask(ptr %p, <2 x i64> %data) {
-; CHECK-LABEL: @scalarize_v2i64_ones_mask(
-; CHECK-NEXT: store <2 x i64> [[DATA:%.*]], ptr [[P:%.*]], align 8
-; CHECK-NEXT: ret void
+; CHECK-LE-LABEL: @scalarize_v2i64_ones_mask(
+; CHECK-LE-NEXT: store <2 x i64> [[DATA:%.*]], ptr [[P:%.*]], align 8
+; CHECK-LE-NEXT: ret void
+;
+; CHECK-SVE-LE-LABEL: @scalarize_v2i64_ones_mask(
+; CHECK-SVE-LE-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[DATA:%.*]], ptr [[P:%.*]], i32 8, <2 x i1> <i1 true, i1 true>)
+; CHECK-SVE-LE-NEXT: ret void
+;
+; CHECK-BE-LABEL: @scalarize_v2i64_ones_mask(
+; CHECK-BE-NEXT: store <2 x i64> [[DATA:%.*]], ptr [[P:%.*]], align 8
+; CHECK-BE-NEXT: ret void
;
call void @llvm.masked.store.v2i64.p0(<2 x i64> %data, ptr %p, i32 8, <2 x i1> <i1 true, i1 true>)
ret void
}
define void @scalarize_v2i64_zero_mask(ptr %p, <2 x i64> %data) {
-; CHECK-LABEL: @scalarize_v2i64_zero_mask(
-; CHECK-NEXT: ret void
+; CHECK-LE-LABEL: @scalarize_v2i64_zero_mask(
+; CHECK-LE-NEXT: ret void
+;
+; CHECK-SVE-LE-LABEL: @scalarize_v2i64_zero_mask(
+; CHECK-SVE-LE-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[DATA:%.*]], ptr [[P:%.*]], i32 8, <2 x i1> zeroinitializer)
+; CHECK-SVE-LE-NEXT: ret void
+;
+; CHECK-BE-LABEL: @scalarize_v2i64_zero_mask(
+; CHECK-BE-NEXT: ret void
;
call void @llvm.masked.store.v2i64.p0(<2 x i64> %data, ptr %p, i32 8, <2 x i1> <i1 false, i1 false>)
ret void
}
define void @scalarize_v2i64_const_mask(ptr %p, <2 x i64> %data) {
-; CHECK-LABEL: @scalarize_v2i64_const_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
-; CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8
-; CHECK-NEXT: ret void
+; CHECK-LE-LABEL: @scalarize_v2i64_const_mask(
+; CHECK-LE-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 1
+; CHECK-LE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
+; CHECK-LE-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8
+; CHECK-LE-NEXT: ret void
+;
+; CHECK-SVE-LE-LABEL: @scalarize_v2i64_const_mask(
+; CHECK-SVE-LE-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[DATA:%.*]], ptr [[P:%.*]], i32 8, <2 x i1> <i1 false, i1 true>)
+; CHECK-SVE-LE-NEXT: ret void
+;
+; CHECK-BE-LABEL: @scalarize_v2i64_const_mask(
+; CHECK-BE-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 1
+; CHECK-BE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
+; CHECK-BE-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8
+; CHECK-BE-NEXT: ret void
;
call void @llvm.masked.store.v2i64.p0(<2 x i64> %data, ptr %p, i32 8, <2 x i1> <i1 false, i1 true>)
ret void
More information about the llvm-commits
mailing list