[llvm] [DSE] Split memory intrinsics if they are dead in the middle (PR #75478)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 9 18:39:39 PDT 2024
================
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+
+; RUN: opt < %s -passes=dse -S | FileCheck %s
+
+define void @overwrite_middle(ptr %X) {
+; CHECK-LABEL: define void @overwrite_middle(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[REAR:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 992
+; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 16 dereferenceable(8) [[REAR]], i8 5, i64 8, i1 false)
+; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 16 dereferenceable(16) [[X]], i8 5, i64 16, i1 false)
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 16
+; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 16 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @llvm.memset.p0.i64(ptr align 16 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+ %arrayidx = getelementptr inbounds i8, ptr %X, i64 16
+ tail call void @llvm.memset.p0.i64(ptr align 16 dereferenceable(980) %arrayidx, i8 3, i64 980, i1 false)
+ ret void
+}
+
+define void @overwrite_middle_mismatched_alignment(ptr %X) {
+; CHECK-LABEL: define void @overwrite_middle_mismatched_alignment(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[REAR:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 992
+; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 16 dereferenceable(8) [[REAR]], i8 5, i64 8, i1 false)
+; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 16 dereferenceable(16) [[X]], i8 5, i64 16, i1 false)
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 16
+; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @llvm.memset.p0.i64(ptr align 16 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+ %arrayidx = getelementptr inbounds i8, ptr %X, i64 16
+ tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(980) %arrayidx, i8 3, i64 980, i1 false)
+ ret void
+}
+
+define void @overwrite_middle2(ptr %X) {
+; CHECK-LABEL: define void @overwrite_middle2(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 990
+; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(10) [[TMP0]], i8 5, i64 10, i1 false)
+; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(10) [[X]], i8 5, i64 10, i1 false)
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
+; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+ %arrayidx = getelementptr inbounds i8, ptr %X, i64 10
+ tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(980) %arrayidx, i8 3, i64 980, i1 false)
+ ret void
+}
+
+define void @front_and_rear_bigger_than_threshold(ptr %X) {
+; CHECK-LABEL: define void @front_and_rear_bigger_than_threshold(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 65
+; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(800) [[ARRAYIDX]], i8 3, i64 800, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+ %arrayidx = getelementptr inbounds i8, ptr %X, i64 65
+ tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(800) %arrayidx, i8 3, i64 800, i1 false)
+ ret void
+}
+
+define void @dead_smaller_than_threshold(ptr %X) {
+; CHECK-LABEL: define void @dead_smaller_than_threshold(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 35
+; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(15) [[TMP0]], i8 5, i64 15, i1 false)
+; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(10) [[X]], i8 5, i64 10, i1 false)
----------------
DianQK wrote:
Could you swap the two memset intrinsic? This will probably make the CPU cache happy?
https://github.com/llvm/llvm-project/pull/75478
More information about the llvm-commits
mailing list