[llvm] [AArch64][LoopVectorize] Use either fixed-width or scalable VF when tail-folding (PR #67543)

Mon Oct 2 06:10:08 PDT 2023

================
@@ -0,0 +1,31 @@
+; RUN: opt -S < %s -passes=loop-vectorize -mtriple aarch64-linux-gnu -mattr=+sve 2>&1 | FileCheck %s
+
+define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val){
+; CHECK-LABEL: define void @clamped_tc_8
+; CHECK: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> %19, ptr %20, i32 1, <vscale x 8 x i1> %active.lane.mask)
+entry:
+  %rem = and i32 %n, 63
+  %cmp8.not = icmp eq i32 %rem, 0
+  br i1 %cmp8.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  %add = add nuw nsw i32 %rem, 7
----------------
Rin18 wrote:

Done

https://github.com/llvm/llvm-project/pull/67543