[llvm] [ScalarizeMaskedMemIntr] Pre-commit tests for splat optimizations (PR #104527)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 15 17:21:38 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Krzysztof Drewniak (krzysz00)
<details>
<summary>Changes</summary>
Commit tests that track the current behavior when the mask argument to a llvm.masked.load or llvm.masked.store is a splat of a con-constant value (that is, it does nothing special).
---
Full diff: https://github.com/llvm/llvm-project/pull/104527.diff
2 Files Affected:
- (modified) llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll (+35)
- (modified) llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-store.ll (+33)
``````````diff
diff --git a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll
index 8c95a630ebce76..9b1c59829b9ffb 100644
--- a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll
+++ b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll
@@ -58,6 +58,41 @@ define <2 x i64> @scalarize_v2i64_const_mask(ptr %p, <2 x i64> %passthru) {
ret <2 x i64> %ret
}
+; To be fixed: If the mask is the splat/broadcast of a non-constant value, use a
+; vector load
+define <2 x i64> @scalarize_v2i64_splat_mask(ptr %p, i1 %mask, <2 x i64> %passthrough) {
+; CHECK-LABEL: @scalarize_v2i64_splat_mask(
+; CHECK-NEXT: [[MASK_VEC:%.*]] = insertelement <2 x i1> poison, i1 [[MASK:%.*]], i32 0
+; CHECK-NEXT: [[MASK_SPLAT:%.*]] = shufflevector <2 x i1> [[MASK_VEC]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK_SPLAT]] to i2
+; CHECK-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
+; CHECK: cond.load:
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[PASSTHROUGH:%.*]], i64 [[TMP4]], i64 0
+; CHECK-NEXT: br label [[ELSE]]
+; CHECK: else:
+; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHROUGH]], [[TMP0:%.*]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
+; CHECK: cond.load1:
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[P]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP9]], i64 1
+; CHECK-NEXT: br label [[ELSE2]]
+; CHECK: else2:
+; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
+;
+ %mask.vec = insertelement <2 x i1> poison, i1 %mask, i32 0
+ %mask.splat = shufflevector <2 x i1> %mask.vec, <2 x i1> poison, <2 x i32> zeroinitializer
+ %ret = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr %p, i32 8, <2 x i1> %mask.splat, <2 x i64> %passthrough)
+ ret <2 x i64> %ret
+}
+
; This use a byte sized but non power of 2 element size. This used to crash due to bad alignment calculation.
define <2 x i24> @scalarize_v2i24(ptr %p, <2 x i1> %mask, <2 x i24> %passthru) {
; CHECK-LABEL: @scalarize_v2i24(
diff --git a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-store.ll b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-store.ll
index f6e54bc6fe94d3..cd2815e67e6720 100644
--- a/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-store.ll
+++ b/llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-store.ll
@@ -56,4 +56,37 @@ define void @scalarize_v2i64_const_mask(ptr %p, <2 x i64> %data) {
ret void
}
+; To be fixed: If the mask is the splat/broadcast of a non-constant value, use a
+; vector store
+define void @scalarize_v2i64_splat_mask(ptr %p, <2 x i64> %data, i1 %mask) {
+; CHECK-LABEL: @scalarize_v2i64_splat_mask(
+; CHECK-NEXT: [[MASK_VEC:%.*]] = insertelement <2 x i1> poison, i1 [[MASK:%.*]], i32 0
+; CHECK-NEXT: [[MASK_SPLAT:%.*]] = shufflevector <2 x i1> [[MASK_VEC]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK_SPLAT]] to i2
+; CHECK-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[COND_STORE:%.*]], label [[ELSE:%.*]]
+; CHECK: cond.store:
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 0
+; CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP4]], align 8
+; CHECK-NEXT: br label [[ELSE]]
+; CHECK: else:
+; CHECK-NEXT: [[TMP5:%.*]] = and i2 [[SCALAR_MASK]], -2
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i2 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[TMP6]], label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
+; CHECK: cond.store1:
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[P]], i32 1
+; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP8]], align 8
+; CHECK-NEXT: br label [[ELSE2]]
+; CHECK: else2:
+; CHECK-NEXT: ret void
+;
+ %mask.vec = insertelement <2 x i1> poison, i1 %mask, i32 0
+ %mask.splat = shufflevector <2 x i1> %mask.vec, <2 x i1> poison, <2 x i32> zeroinitializer
+ call void @llvm.masked.store.v2i64.p0(<2 x i64> %data, ptr %p, i32 8, <2 x i1> %mask.splat)
+ ret void
+}
+
declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>)
``````````
</details>
https://github.com/llvm/llvm-project/pull/104527
More information about the llvm-commits
mailing list