[clang] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 5 06:03:45 PDT 2024
================
@@ -0,0 +1,369 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=loop-vectorize -mtriple=aarch64-linux-gnu -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s
+define dso_local void @whilewr_8(ptr noalias nocapture noundef readonly %a, ptr nocapture noundef readonly %b, ptr nocapture noundef writeonly %c, i32 noundef %n) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @whilewr_8(
+; CHECK-SAME: ptr noalias nocapture noundef readonly [[A:%.*]], ptr nocapture noundef readonly [[B:%.*]], ptr nocapture noundef writeonly [[C:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[C14:%.*]] = ptrtoint ptr [[C]] to i64
+; CHECK-NEXT: [[B15:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT: [[SUB_DIFF:%.*]] = sub i64 [[B15]], [[C14]]
+; CHECK-NEXT: [[NEG_COMPARE:%.*]] = icmp slt i64 [[SUB_DIFF]], 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i1> poison, i1 [[NEG_COMPARE]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i1> [[DOTSPLATINSERT]], <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT: [[PTR_DIFF_LANE_MASK:%.*]] = tail call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[SUB_DIFF]])
+; CHECK-NEXT: [[ACTIVE_LANE_MASK_ALIAS:%.*]] = or <vscale x 16 x i1> [[PTR_DIFF_LANE_MASK]], [[DOTSPLAT]]
+; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = tail call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
+; CHECK-NEXT: [[TMP0:%.*]] = zext <vscale x 16 x i1> [[ACTIVE_LANE_MASK_ALIAS]] to <vscale x 16 x i8>
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i64
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[FOR_BODY_PREHEADER]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], [[ACTIVE_LANE_MASK_ALIAS]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP4]], i32 1, <vscale x 16 x i1> [[TMP3]], <vscale x 16 x i8> poison)
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_MASKED_LOAD16:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP5]], i32 1, <vscale x 16 x i1> [[TMP3]], <vscale x 16 x i8> poison)
+; CHECK-NEXT: [[TMP6:%.*]] = add <vscale x 16 x i8> [[WIDE_MASKED_LOAD16]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP6]], ptr [[TMP7]], i32 1, <vscale x 16 x i1> [[TMP3]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]]
+; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = tail call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
+; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+; CHECK: for.cond.cleanup.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp11 = icmp sgt i32 %n, 0
+ br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
----------------
fhahn wrote:
I might be missing something but the input is already vectorized?
https://github.com/llvm/llvm-project/pull/100579
More information about the llvm-commits
mailing list