[llvm] 46541a3 - [ARM] Add a extra MVE low-trip-count loop. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu May 23 13:50:52 PDT 2024
Author: David Green
Date: 2024-05-23T21:50:47+01:00
New Revision: 46541a363695b9014cd95267fa78592be752d64d
URL: https://github.com/llvm/llvm-project/commit/46541a363695b9014cd95267fa78592be752d64d
DIFF: https://github.com/llvm/llvm-project/commit/46541a363695b9014cd95267fa78592be752d64d.diff
LOG: [ARM] Add a extra MVE low-trip-count loop. NFC
This makes use of half floats, which makes the masked stores expensive.
Added:
Modified:
llvm/test/Transforms/LoopVectorize/ARM/mve-known-trip-count.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-known-trip-count.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-known-trip-count.ll
index c3e30f1f81f4f..e796e40a7591e 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-known-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-known-trip-count.ll
@@ -592,7 +592,41 @@ define dso_local i32 @predicated_test(i32 noundef %0, ptr %glob) #0 {
ret i32 0
}
+; This has a maximum trip count of 4. The codegen is currently much better with <8 x half> vectorization.
+; CHECK-LABEL: arm_q15_to_f16_remainder
+; CHECK: LV: Selecting VF: 8
+define void @arm_q15_to_f16_remainder(ptr nocapture noundef readonly %pSrc, ptr nocapture noundef writeonly noalias %pDst, i32 noundef %blockSize) #0 {
+entry:
+ %rem = and i32 %blockSize, 3
+ %cmp.not5 = icmp eq i32 %rem, 0
+ br i1 %cmp.not5, label %while.end, label %while.body.preheader
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+ %blkCnt.08 = phi i32 [ %dec, %while.body ], [ %rem, %while.body.preheader ]
+ %pIn.07 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ]
+ %pDst.addr.06 = phi ptr [ %incdec.ptr2, %while.body ], [ %pDst, %while.body.preheader ]
+ %incdec.ptr = getelementptr inbounds i8, ptr %pIn.07, i32 2
+ %0 = load i16, ptr %pIn.07, align 2
+ %conv1 = sitofp i16 %0 to half
+ %1 = fmul fast half %conv1, 0xH0200
+ %incdec.ptr2 = getelementptr inbounds i8, ptr %pDst.addr.06, i32 2
+ store half %1, ptr %pDst.addr.06, align 2
+ %dec = add nsw i32 %blkCnt.08, -1
+ %cmp.not = icmp eq i32 %dec, 0
+ br i1 %cmp.not, label %while.end.loopexit, label %while.body
+
+while.end.loopexit: ; preds = %while.body
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %entry
+ ret void
+}
+
+
declare void @llvm.lifetime.start.p0(i64, ptr)
declare void @llvm.lifetime.end.p0(i64, ptr)
-attributes #0 = { "target-features"="+mve" }
+attributes #0 = { "target-features"="+mve.fp" }
More information about the llvm-commits
mailing list