[llvm] 14b9505 - Add test to show missed optimization for masked load/stores

Wed Oct 12 10:44:34 PDT 2022

Author: Benjamin Maxwell
Date: 2022-10-12T17:43:54Z
New Revision: 14b9505be9d10513b4ca0a22a73a158bf28d0959

URL: https://github.com/llvm/llvm-project/commit/14b9505be9d10513b4ca0a22a73a158bf28d0959
DIFF: https://github.com/llvm/llvm-project/commit/14b9505be9d10513b4ca0a22a73a158bf28d0959.diff

LOG: Add test to show missed optimization for masked load/stores

This test shows instcombine failing to remove a alloca and memcpy for
for a constant array that is read with a masked load.

This will be addressed in a subsequent commit.

Added: 
    llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll
new file mode 100644
index 000000000000..78506f19babf

--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -opaque-pointers -passes=instcombine < %s | FileCheck %s
+
+ at contant_int_array = private unnamed_addr constant [10 x i64] [i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9]
+
+; InstCombine should be able to optimize out the alloca and memcpy:
+define void @combine_masked_load_store_from_constant_array(ptr %ptr) {
+; CHECK-LABEL: @combine_masked_load_store_from_constant_array(
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [10 x i64], align 8
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(80) [[TMP1]], ptr noundef nonnull align 16 dereferenceable(80) @contant_int_array, i64 80, i1 false)
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 0, i32 10)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr nonnull [[TMP1]], i32 8, <vscale x 2 x i1> [[TMP2]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], ptr [[PTR:%.*]], i32 1, <vscale x 2 x i1> [[TMP2]])
+; CHECK-NEXT:    ret void
+;
+  %1 = alloca [10 x i64]
+  call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr @contant_int_array, i64 80, i1 false)
+  %2 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 0, i32 10)
+  %3 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr nonnull %1, i32 8, <vscale x 2 x i1> %2, <vscale x 2 x i64> zeroinitializer)
+  call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> %3, ptr %ptr, i32 1, <vscale x 2 x i1> %2)
+  ret void
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
+declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64>, ptr, i32, <vscale x 2 x i1>)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32, i32)