[llvm-bugs] [Bug 51193] New: Missed opportuniti4es for register promotion
via llvm-bugs
llvm-bugs at lists.llvm.org
Fri Jul 23 12:56:45 PDT 2021
https://bugs.llvm.org/show_bug.cgi?id=51193
Bug ID: 51193
Summary: Missed opportuniti4es for register promotion
Product: libraries
Version: trunk
Hardware: PC
OS: Windows NT
Status: NEW
Severity: enhancement
Priority: P
Component: Scalar Optimizations
Assignee: unassignedbugs at nondot.org
Reporter: momchil.velikov at arm.com
CC: llvm-bugs at lists.llvm.org
Considering this example (https://gcc.godbolt.org/z/55dfPrc8T), compiled with
`clang -target aarch64-linux -Ofast`
int u, v;
void f(int a[restrict], int b[restrict], int n) {
for (int i = 0; i < n; ++i) {
if (a[i]) {
++u;
break;
}
++u;
if (b[i])
++v;
}
}
Clang emits
f: // @f
cmp w2, #1 // =1
b.lt .LBB0_7
adrp x8, u
ldr w10, [x8, :lo12:u]
mov w9, w2
add w12, w10, #1 // =1
adrp x10, v
b .LBB0_3
.LBB0_2: // in Loop: Header=BB0_3 Depth=1
add w12, w11, #1 // =1
add x1, x1, #4 // =4
subs x9, x9, #1 // =1
add x0, x0, #4 // =4
b.eq .LBB0_6
.LBB0_3: // =>This Inner Loop Header: Depth=1
ldr w13, [x0]
mov w11, w12
cbnz w13, .LBB0_6
ldr w12, [x1]
cbz w12, .LBB0_2
ldr w12, [x10, :lo12:v]
add w12, w12, #1 // =1
str w12, [x10, :lo12:v]
b .LBB0_2
.LBB0_6:
str w11, [x8, :lo12:u]
.LBB0_7:
ret
where the updates to `u` are performed with a single store instruction after
the loop, for the case the loop body is executed at least once. In contrast,
updates to `v` are performed on each loop iteration, since moving them outside
the loop may introduce traps or data race.
In the output for the same code, compiled with GCC with `-Ofast` even updates
to `v` are moved out of the loop, since `-Ofast` enables `
-fallow-store-data-races`. But even compiled with `-O2`, the
updates to `v` are still moved outside the loop, only that the store that
writes to `v` is conditional, , i.e:
f:
cmp w2, 0
ble .L1
adrp x10, .LANCHOR0
add x11, x10, :lo12:.LANCHOR0
mov x3, 0
mov w8, 0
ldr w9, [x10, #:lo12:.LANCHOR0]
mov w7, 0
ldr w6, [x11, 4]
add w5, w2, w9
mov w2, w9
b .L7
.L3:
ldr w4, [x1, x3]
add w2, w2, 1
add x3, x3, 4
mov w7, 1
cbz w4, .L6
add w6, w6, w7
mov w8, w7
.L6:
cmp w2, w5
beq .L22
.L7:
ldr w4, [x0, x3]
cbz w4, .L3
cbz w8, .L4
str w6, [x11, 4]
.L4:
cmp w7, 0
csel w9, w9, w2, eq
add w9, w9, 1
str w9, [x10, #:lo12:.LANCHOR0]
.L1:
ret
.L22:
cbz w8, .L8
str w6, [x11, 4]
.L8:
str w2, [x10, #:lo12:.LANCHOR0]
ret
Clang/LLVM could take advantage of a command line option, function or variable
attribute that allows transformations that could potentially introduce data
races. That sounds reasonable for single-threaded programs or for part of code
where absence of data races is guaranteed at a different level of abstraction.
Even without introducing violations of the C/C++/LLVM memory model, LLVM could
hoist the load, e.g. transform
for ... {
...
if cond {
x0 = *a
x1 = x0 + 1
*a = x1
}
...
}
into
x0 = *a
for ... {
...
x1 = phi(x0, x3)
if cond {
x2 = x1 + 1
*a = x2
}
x3 = phi(x1, x2)
...
}
if `a` is determined to be dereferenceable.
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20210723/aa9cc27e/attachment-0001.html>
More information about the llvm-bugs
mailing list