[llvm] [DSE] Split memory intrinsics if they are dead in the middle (PR #75478)

Nabeel Omer via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 2 05:42:01 PDT 2024


================
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+
+; RUN: opt < %s -passes=dse -S | FileCheck %s
+
+define void @overwrite_middle(ptr  %X) {
+; CHECK-LABEL: define void @overwrite_middle(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[REAR:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 992
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 16 dereferenceable(8) [[REAR]], i8 5, i64 8, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 16 dereferenceable(16) [[X]], i8 5, i64 16, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 16
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 16 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr  align 16 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 16
+  tail call void @llvm.memset.p0.i64(ptr  align 16 dereferenceable(980) %arrayidx, i8 3, i64 980, i1 false)
+  ret void
+}
+
+define void @overwrite_middle_smaller_alignment(ptr  %X) {
+; CHECK-LABEL: define void @overwrite_middle_smaller_alignment(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[REAR:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 984
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 8 dereferenceable(16) [[REAR]], i8 5, i64 16, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 8 dereferenceable(8) [[X]], i8 5, i64 8, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 8
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 8 dereferenceable(976) [[ARRAYIDX]], i8 3, i64 976, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr  align 8 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 8
+  tail call void @llvm.memset.p0.i64(ptr  align 8 dereferenceable(976) %arrayidx, i8 3, i64 976, i1 false)
+  ret void
+}
+
+define void @overwrite_middle2(ptr  %X) {
+; CHECK-LABEL: define void @overwrite_middle2(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 990
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(10) [[TMP0]], i8 5, i64 10, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(10) [[X]], i8 5, i64 10, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr  align 1 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 10
+  tail call void @llvm.memset.p0.i64(ptr  align 1 dereferenceable(980) %arrayidx, i8 3, i64 980, i1 false)
+  ret void
+}
+
+define void @front_and_rear_bigger_than_threshold(ptr  %X) {
+; CHECK-LABEL: define void @front_and_rear_bigger_than_threshold(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 65
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(800) [[ARRAYIDX]], i8 3, i64 800, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr  align 1 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 65
+  tail call void @llvm.memset.p0.i64(ptr  align 1 dereferenceable(800) %arrayidx, i8 3, i64 800, i1 false)
+  ret void
+}
+
+define void @dead_smaller_than_threshold(ptr  %X) {
+; CHECK-LABEL: define void @dead_smaller_than_threshold(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 35
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(15) [[TMP0]], i8 5, i64 15, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(10) [[X]], i8 5, i64 10, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(45) [[ARRAYIDX]], i8 3, i64 25, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr  align 1 dereferenceable(50) %X, i8 5, i64 50, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 10
+  tail call void @llvm.memset.p0.i64(ptr  align 1 dereferenceable(45) %arrayidx, i8 3, i64 25, i1 false)
+  ret void
+}
+
+define void @dontwrite28to32memset_atomic(ptr nocapture %X) {
+; CHECK-LABEL: define void @dontwrite28to32memset_atomic(
+; CHECK-SAME: ptr nocapture [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[REAR:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 992
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 dereferenceable(8) [[REAR]], i8 5, i64 8, i32 4)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 dereferenceable(16) [[X]], i8 5, i64 16, i32 4)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 16
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 [[ARRAYIDX]], i8 3, i64 978, i32 4)
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 %X, i8 5, i64 1000, i32 4)
----------------
omern1 wrote:

Yeah, the only requirement appears to be that the size of front and rear should be a muiltiple of the element size.

https://github.com/llvm/llvm-project/pull/75478


More information about the llvm-commits mailing list