[llvm] [LV] Add initial legality checks for ee loops with stores (PR #145663)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 11 04:10:35 PDT 2025
================
@@ -405,5 +407,59 @@ exit:
ret void
}
+define void @loop_contains_store_condition_load_requires_gather(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(512) readonly %pred, ptr align 1 dereferenceable(20) readonly %offsets) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_condition_load_requires_gather'
+; CHECK: LV: Not vectorizing: Loop may fault.
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %offset.addr = getelementptr inbounds nuw i8, ptr %offsets, i64 %iv
+ %offset = load i8, ptr %offset.addr, align 1
+ %offset.zext = zext i8 %offset to i64
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %offset.zext
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @loop_contains_store_uncounted_exit_is_a_switch(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
----------------
david-arm wrote:
I think it would be good to have an additional vectoriser test (doesn't have to be this file) somewhere for loops like this:
```
struct foo {
int a, b, c, d;
};
void foo(int *dst, struct foo *src) {
int array[(100 * 4) - 3];
memcpy(array, src, ((100 * 4) - 3) * 4);
for (int i = 0; i < 100; i++) {
struct foo tmp = array[i];
if (tmp.a > 3)
break;
dst[i] = tmp.b + tmp.c + tmp.d;
}
}
```
In this example I imagine this PR would consider such a loop legal to vectorise, but there are restrictions on how we vectorise. Given you are only checking if the load of foo.a is dereferenceable I don't think we should treat the load of struct foo as an interleave group and end up using a wider load, which could fault.
https://github.com/llvm/llvm-project/pull/145663
More information about the llvm-commits
mailing list