[llvm-bugs] [Bug 43539] New: #pragma clang loop vectorize(disable) should actually disable vectorization
via llvm-bugs
llvm-bugs at lists.llvm.org
Wed Oct 2 16:45:14 PDT 2019
https://bugs.llvm.org/show_bug.cgi?id=43539
Bug ID: 43539
Summary: #pragma clang loop vectorize(disable) should actually
disable vectorization
Product: clang
Version: trunk
Hardware: Other
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: LLVM Codegen
Assignee: unassignedclangbugs at nondot.org
Reporter: husseydevin at gmail.com
CC: llvm-bugs at lists.llvm.org, neeilans at live.com,
richard-llvm at metafoo.co.uk
**#pragma clang loop vectorize(disable) is useless.**
It only works on tiny loops. Anything relatively complicated will be vectorized
regardless.
(I'm guessing that the cost model overrides it):
#include <stdint.h>
#include <stddef.h>
uint64_t some_func(uint64_t *p, size_t len)
{
uint64_t sum1 = 0, sum2 = 0;
len &= ~(size_t)3; // always multiple of 4
#pragma clang loop vectorize(disable)
for (size_t i = 0; i < len; i++) {
sum1 += *p++ ^ 12345;
sum2 += *p++ ^ 12345;
}
return sum1 ^ sum2;
}
I'd expect this for aarch64 with -O3 (actual output from
-march=armv8-a+nosimd):
some_func:
// %bb.0:
mov x8, xzr
ands x10, x1, #0xfffffffffffffffc
b.eq .LBB0_4
// %bb.1:
mov x9, xzr
mov x11, xzr
mov x12, xzr
add x13, x0, #16
mov w14, #12345
.LBB0_2:
ldp x15, x16, [x13, #-16]
ldp x17, x0, [x13], #32
subs x10, x10, #2
eor x15, x15, x14
eor x17, x17, x14
eor x16, x16, x14
eor x0, x0, x14
add x11, x15, x11
add x12, x17, x12
add x8, x16, x8
add x9, x0, x9
b.ne .LBB0_2
// %bb.3:
add x10, x12, x11
add x8, x9, x8
.LBB0_4:
eor x0, x8, x10
ret
However, I get this, a clearly vectorized loop:
some_func:
// %bb.0:
ands x8, x1, #0xfffffffffffffffc
b.eq .LBB0_4
// %bb.1:
mov w10, #12345
add x9, x0, #16
movi v0.2d, #0000000000000000
dup v1.2d, x10
movi v2.2d, #0000000000000000
.LBB0_2:
ldp q3, q4, [x9, #-16]
subs x8, x8, #2
add x9, x9, #32
eor v3.16b, v3.16b, v1.16b
eor v4.16b, v4.16b, v1.16b
add v2.2d, v3.2d, v2.2d
add v0.2d, v4.2d, v0.2d
b.ne .LBB0_2
// %bb.3:
add v0.2d, v0.2d, v2.2d
fmov x8, d0
mov x9, v0.d[1]
eor x0, x9, x8
ret
.LBB0_4:
movi v0.2d, #0000000000000000
fmov x8, d0
mov x9, v0.d[1]
eor x0, x9, x8
ret
Similar things are output on SSE2, NEON32, etc.
#pragma clang loop vectorize(disable) needs to completely shut off
vectorization for the loop.
Need I say more?
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20191002/92741678/attachment.html>
More information about the llvm-bugs
mailing list