[llvm] [LoopVectorize] Add support for vectorisation of simple early exit loops (PR #88385)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu May 9 00:49:57 PDT 2024
================
@@ -0,0 +1,2660 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -mattr=+sve \
+; RUN: -mtriple aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,MAY_FAULT
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -vectorizer-no-mem-fault \
+; RUN: -mattr=+sve -mtriple aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,NO_FAULT
+
+
+define i64 @same_exit_block_pre_inc_use1() {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 40, [[TMP3]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 40, [[TMP5]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 40, [[N_MOD_VF]]
+; CHECK-NEXT: [[IND_END:%.*]] = add i64 3, [[N_VEC]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 16
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], [[FOR_INC3:%.*]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
+; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 16 x i64> @llvm.experimental.stepvector.nxv16i64()
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i64> poison, i64 [[OFFSET_IDX]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i64> [[DOTSPLATINSERT]], <vscale x 16 x i64> poison, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 16 x i64> zeroinitializer, [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 16 x i64> [[TMP9]], shufflevector (<vscale x 16 x i64> insertelement (<vscale x 16 x i64> poison, i64 1, i64 0), <vscale x 16 x i64> poison, <vscale x 16 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 16 x i64> [[DOTSPLAT]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 4
+; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 5
+; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 6
+; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[OFFSET_IDX]], 7
+; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[OFFSET_IDX]], 8
+; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OFFSET_IDX]], 9
+; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[OFFSET_IDX]], 10
+; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[OFFSET_IDX]], 11
+; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[OFFSET_IDX]], 12
+; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], 13
+; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX]], 14
+; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[OFFSET_IDX]], 15
+; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP28]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP29]], align 1
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP31]], align 1
+; CHECK-NEXT: [[TMP32:%.*]] = icmp eq <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[TMP33:%.*]] = xor <vscale x 16 x i1> [[TMP32]], shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP34:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP33]])
+; CHECK-NEXT: br i1 [[TMP34]], label [[VECTOR_EARLY_EXIT:%.*]], label [[FOR_INC3]]
+; CHECK: loop.inc3:
+; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX1]], [[TMP7]]
+; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT4]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: [[TMP36:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP33]], i1 true)
+; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[TMP36]], [[INDEX1]]
+; CHECK-NEXT: [[IND_EARLY_ESCAPE:%.*]] = add i64 3, [[TMP37]]
+; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 40, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LAND_RHS:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 43
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 43, [[FOR_INC]] ], [ [[IND_EARLY_ESCAPE]], [[VECTOR_EARLY_EXIT]] ], [ 43, [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i64 [[START_0_LCSSA]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
----------------
fhahn wrote:
At the moment, the tests read from uninitialized memory, which means automatic verification won't work. It would probably also make them more robust to avoid reading from uninitialized memory (e.g. by passing the pointers to a call first)
https://github.com/llvm/llvm-project/pull/88385
More information about the llvm-commits
mailing list