[llvm] [AArch64] Add MATCH loops to LoopIdiomVectorizePass (PR #101976)

David Sherwood via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 30 08:03:55 PST 2025


================
@@ -0,0 +1,445 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=aarch64 -mattr=+sve -passes='loop(loop-idiom-vectorize)' -verify-loop-info -verify-dom-info -S < %s | FileCheck %s
+; RUN: opt -mtriple=aarch64 -mattr=+sve -passes='loop(loop-idiom-vectorize)' -disable-loop-idiom-vectorize-find-first-byte -S < %s | FileCheck -check-prefix=DISABLE %s
+
+; Base case based on `libcxx/include/__algorithm/find_first_of.h':
+;   char* find_first_of(char *first, char *last, char *s_first, char *s_last) {
+;     for (; first != last; ++first)
+;       for (char *it = s_first; it != s_last; ++it)
+;         if (*first == *it)
+;           return first;
+;     return last;
+;   }
+define ptr @find_first_of_i8(ptr %0, ptr %1, ptr %2, ptr %3) #0 {
+; CHECK-LABEL: define ptr @find_first_of_i8(
+; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]], ptr [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq ptr [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[BB60:.*]], [[DOTPREHEADER:label %.*]]
+; CHECK:       [[_PREHEADER:.*:]]
+; CHECK-NEXT:    br label %[[BB8:.*]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[TMP3]] to i64
+; CHECK-NEXT:    [[TMP13:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 16)
+; CHECK-NEXT:    [[TMP14:%.*]] = lshr i64 [[TMP9]], 12
+; CHECK-NEXT:    [[TMP15:%.*]] = lshr i64 [[TMP10]], 12
+; CHECK-NEXT:    [[TMP16:%.*]] = lshr i64 [[TMP11]], 12
+; CHECK-NEXT:    [[TMP17:%.*]] = lshr i64 [[TMP12]], 12
+; CHECK-NEXT:    [[TMP18:%.*]] = icmp ne i64 [[TMP14]], [[TMP15]]
+; CHECK-NEXT:    [[TMP19:%.*]] = icmp ne i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP19]]
+; CHECK-NEXT:    br i1 [[TMP20]], label %[[SCALAR_PH:.*]], label %[[BB21:.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       [[BB21]]:
+; CHECK-NEXT:    [[PSEARCH:%.*]] = phi ptr [ [[TMP0]], %[[BB8]] ], [ [[TMP45:%.*]], %[[TMP44:.*]] ]
+; CHECK-NEXT:    [[TMP22:%.*]] = ptrtoint ptr [[PSEARCH]] to i64
+; CHECK-NEXT:    [[TMP23:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP22]], i64 [[TMP10]])
+; CHECK-NEXT:    [[TMP24:%.*]] = and <vscale x 16 x i1> [[TMP13]], [[TMP23]]
+; CHECK-NEXT:    [[TMP25:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[PSEARCH]], i32 1, <vscale x 16 x i1> [[TMP24]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    br label %[[BB26:.*]]
+; CHECK:       [[BB26]]:
+; CHECK-NEXT:    [[PNEEDLE:%.*]] = phi ptr [ [[TMP2]], %[[BB21]] ], [ [[TMP42:%.*]], %[[TMP41:.*]] ]
+; CHECK-NEXT:    [[TMP27:%.*]] = ptrtoint ptr [[PNEEDLE]] to i64
+; CHECK-NEXT:    [[TMP28:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP27]], i64 [[TMP12]])
+; CHECK-NEXT:    [[TMP29:%.*]] = and <vscale x 16 x i1> [[TMP13]], [[TMP28]]
+; CHECK-NEXT:    [[TMP30:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[PNEEDLE]], i32 1, <vscale x 16 x i1> [[TMP29]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <vscale x 16 x i8> [[TMP30]], i64 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP31]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP32:%.*]] = select <vscale x 16 x i1> [[TMP29]], <vscale x 16 x i8> [[TMP30]], <vscale x 16 x i8> [[DOTSPLAT]]
+; CHECK-NEXT:    [[TMP33:%.*]] = call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[TMP32]], i64 0)
+; CHECK-NEXT:    [[TMP34:%.*]] = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> [[TMP25]], <16 x i8> [[TMP33]], <vscale x 16 x i1> [[TMP24]])
+; CHECK-NEXT:    [[TMP35:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP34]])
+; CHECK-NEXT:    br i1 [[TMP35]], label %[[BB36:.*]], label %[[TMP41]]
+; CHECK:       [[BB36]]:
+; CHECK-NEXT:    [[TMP37:%.*]] = phi ptr [ [[PSEARCH]], %[[BB26]] ]
+; CHECK-NEXT:    [[TMP38:%.*]] = phi <vscale x 16 x i1> [ [[TMP34]], %[[BB26]] ]
+; CHECK-NEXT:    [[TMP39:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP38]], i1 true)
+; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr i8, ptr [[TMP37]], i64 [[TMP39]]
+; CHECK-NEXT:    br label %[[DOTLOOPEXIT:.*]]
+; CHECK:       [[TMP41]]:
+; CHECK-NEXT:    [[TMP42]] = getelementptr i8, ptr [[PNEEDLE]], i64 16
+; CHECK-NEXT:    [[TMP43:%.*]] = icmp ult ptr [[TMP42]], [[TMP3]]
+; CHECK-NEXT:    br i1 [[TMP43]], label %[[BB26]], label %[[TMP44]]
+; CHECK:       [[TMP44]]:
+; CHECK-NEXT:    [[TMP45]] = getelementptr i8, ptr [[PSEARCH]], i64 16
+; CHECK-NEXT:    [[TMP46:%.*]] = icmp ult ptr [[TMP45]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[TMP46]], label %[[BB21]], label %[[DOTLOOPEXIT1:.*]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    br label %[[BB47:.*]]
+; CHECK:       [[BB47]]:
+; CHECK-NEXT:    [[TMP48:%.*]] = phi ptr [ [[TMP58:%.*]], %[[TMP57:.*]] ], [ [[TMP0]], %[[SCALAR_PH]] ]
+; CHECK-NEXT:    [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
+; CHECK-NEXT:    br label %[[BB53:.*]]
+; CHECK:       [[BB50:.*]]:
+; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds i8, ptr [[TMP54:%.*]], i64 1
+; CHECK-NEXT:    [[TMP52:%.*]] = icmp eq ptr [[TMP51]], [[TMP3]]
+; CHECK-NEXT:    br i1 [[TMP52]], label %[[TMP57]], label %[[BB53]]
+; CHECK:       [[BB53]]:
+; CHECK-NEXT:    [[TMP54]] = phi ptr [ [[TMP2]], %[[BB47]] ], [ [[TMP51]], %[[BB50]] ]
+; CHECK-NEXT:    [[TMP55:%.*]] = load i8, ptr [[TMP54]], align 1
+; CHECK-NEXT:    [[TMP56:%.*]] = icmp eq i8 [[TMP49]], [[TMP55]]
+; CHECK-NEXT:    br i1 [[TMP56]], label %[[DOTLOOPEXIT]], label %[[BB50]]
+; CHECK:       [[TMP57]]:
+; CHECK-NEXT:    [[TMP58]] = getelementptr inbounds i8, ptr [[TMP48]], i64 1
+; CHECK-NEXT:    [[TMP59:%.*]] = icmp eq ptr [[TMP58]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[TMP59]], label %[[DOTLOOPEXIT1]], label %[[BB47]]
+; CHECK:       [[_LOOPEXIT:.*:]]
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi ptr [ [[TMP48]], %[[BB53]] ], [ [[TMP40]], %[[BB36]] ]
+; CHECK-NEXT:    br label %[[BB60]]
+; CHECK:       [[_LOOPEXIT1:.*:]]
+; CHECK-NEXT:    br label %[[BB60]]
+; CHECK:       [[BB60]]:
+; CHECK-NEXT:    [[TMP61:%.*]] = phi ptr [ [[TMP1]], [[TMP4:%.*]] ], [ [[DOTLCSSA]], %[[DOTLOOPEXIT]] ], [ [[TMP1]], %[[DOTLOOPEXIT1]] ]
+; CHECK-NEXT:    ret ptr [[TMP61]]
+;
+; DISABLE-LABEL: define ptr @find_first_of_i8(
+; DISABLE-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]], ptr [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] {
+; DISABLE-NEXT:    [[TMP5:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
+; DISABLE-NEXT:    [[TMP6:%.*]] = icmp eq ptr [[TMP2]], [[TMP3]]
+; DISABLE-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
+; DISABLE-NEXT:    br i1 [[TMP7]], label %[[BB21:.*]], label %[[DOTPREHEADER:.*]]
+; DISABLE:       [[_PREHEADER:.*:]]
+; DISABLE-NEXT:    br label %[[BB8:.*]]
+; DISABLE:       [[BB8]]:
+; DISABLE-NEXT:    [[TMP9:%.*]] = phi ptr [ [[TMP19:%.*]], %[[TMP18:.*]] ], [ [[TMP0]], %[[DOTPREHEADER]] ]
+; DISABLE-NEXT:    [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
+; DISABLE-NEXT:    br label %[[BB14:.*]]
+; DISABLE:       [[BB11:.*]]:
+; DISABLE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP15:%.*]], i64 1
+; DISABLE-NEXT:    [[TMP13:%.*]] = icmp eq ptr [[TMP12]], [[TMP3]]
+; DISABLE-NEXT:    br i1 [[TMP13]], label %[[TMP18]], label %[[BB14]]
+; DISABLE:       [[BB14]]:
+; DISABLE-NEXT:    [[TMP15]] = phi ptr [ [[TMP2]], %[[BB8]] ], [ [[TMP12]], %[[BB11]] ]
+; DISABLE-NEXT:    [[TMP16:%.*]] = load i8, ptr [[TMP15]], align 1
+; DISABLE-NEXT:    [[TMP17:%.*]] = icmp eq i8 [[TMP10]], [[TMP16]]
+; DISABLE-NEXT:    br i1 [[TMP17]], label %[[DOTLOOPEXIT:.*]], label %[[BB11]]
+; DISABLE:       [[TMP18]]:
+; DISABLE-NEXT:    [[TMP19]] = getelementptr inbounds i8, ptr [[TMP9]], i64 1
+; DISABLE-NEXT:    [[TMP20:%.*]] = icmp eq ptr [[TMP19]], [[TMP1]]
+; DISABLE-NEXT:    br i1 [[TMP20]], label %[[DOTLOOPEXIT1:.*]], label %[[BB8]]
+; DISABLE:       [[_LOOPEXIT:.*:]]
+; DISABLE-NEXT:    [[DOTLCSSA:%.*]] = phi ptr [ [[TMP9]], %[[BB14]] ]
+; DISABLE-NEXT:    br label %[[BB21]]
+; DISABLE:       [[_LOOPEXIT1:.*:]]
+; DISABLE-NEXT:    br label %[[BB21]]
+; DISABLE:       [[BB21]]:
+; DISABLE-NEXT:    [[TMP22:%.*]] = phi ptr [ [[TMP1]], [[TMP4:%.*]] ], [ [[DOTLCSSA]], %[[DOTLOOPEXIT]] ], [ [[TMP1]], %[[DOTLOOPEXIT1]] ]
+; DISABLE-NEXT:    ret ptr [[TMP22]]
+;
+  %5 = icmp eq ptr %0, %1
----------------
david-arm wrote:

Can you tidy up the tests a little to give the blocks and some of the critical variables more readable names? See Transforms/LoopIdiom/AArch64/byte-compare-index.ll for an example.

https://github.com/llvm/llvm-project/pull/101976


More information about the llvm-commits mailing list