[PATCH] D67800: [InstCombine] Fold a shifty implementation of clamp (e.g., clamp255).
Huihui Zhang via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 20 17:58:14 PDT 2019
huihuiz added a comment.
Similar to D67799 <https://reviews.llvm.org/D67799>
- Scalar Test ---
X86 target:
Test input: Run : clang -O2 -target x86_64 -march=skylake -S clamp255.ll -o -
define i32 @clamp255(i32 %v) {
%sub = sub nsw i32 255, %v
%shr = ashr i32 %sub, 31
%or = or i32 %shr, %v
%and = and i32 %or, 255
ret i32 %and
}
before:
clamp255: # @clamp255
# %bb.0:
movl $255, %eax
subl %edi, %eax
sarl $31, %eax
orl %edi, %eax
movzbl %al, %eax
retq
After this optimization
clamp255: # @clamp255
# %bb.0:
cmpl $256, %edi # imm = 0x100
movl $255, %eax
cmovll %edi, %eax
movzbl %al, %eax
retq
AArch64 target
Same input; Run : clang -O2 -target aarch64 -march=armv8a -S clamp255.ll -o -
before
clamp255: // @clamp255
// %bb.0:
mov w8, #255
sub w8, w8, w0
orr w8, w0, w8, asr #31
and w0, w8, #0xff
ret
After this optimization:
clamp255: // @clamp255
// %bb.0:
cmp w0, #255 // =255
mov w8, #255
csel w8, w0, w8, lt
and w0, w8, #0xff
ret
ARM target:
Same input; Run : clang -O2 -target arm -march=armv8.1a -S clamp255.ll -o -
before:
clamp255:
.fnstart
@ %bb.0:
rsb r1, r0, #255
orr r0, r0, r1, asr #31
uxtb r0, r0
bx lr
After this optimization:
clamp255:
.fnstart
@ %bb.0:
cmp r0, #255
movge r0, #255
uxtb r0, r0
bx lr
- Vector Test ---
X86 target:
Test input; Run: clang -O2 -target x86_64 -march=skylake -S clamp255-vec.ll -o -
define <4 x i32> @clamp255(<4 x i32> %v) {
%sub = sub nsw <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %v
%shr = ashr <4 x i32> %sub, <i32 31, i32 31, i32 31, i32 31>
%or = or <4 x i32> %shr, %v
%and = and <4 x i32> %or, <i32 255, i32 255, i32 255, i32 255>
ret <4 x i32> %and
}
before:
clamp255: # @clamp255
# %bb.0:
vpbroadcastd .LCPI0_0(%rip), %xmm1 # xmm1 = [255,255,255,255]
vpsubd %xmm0, %xmm1, %xmm1
vpsrad $31, %xmm1, %xmm1
vpor %xmm0, %xmm1, %xmm0
vpand .LCPI0_1(%rip), %xmm0, %xmm0
retq
After this optimization
clamp255: # @clamp255
# %bb.0:
vpbroadcastd .LCPI0_0(%rip), %xmm1 # xmm1 = [255,255,255,255]
vpminsd %xmm1, %xmm0, %xmm0
vpand .LCPI0_1(%rip), %xmm0, %xmm0
retq
AArch64 target:
Same input; Run : clang -O2 -target aarch64 -march=armv8a -S clamp255-vec.ll -o -
before
clamp255: // @clamp255
// %bb.0:
movi v1.2d, #0x0000ff000000ff
sub v2.4s, v1.4s, v0.4s
sshr v2.4s, v2.4s, #31
orr v0.16b, v2.16b, v0.16b
and v0.16b, v0.16b, v1.16b
ret
After this optimization
clamp255: // @clamp255
// %bb.0:
movi v1.2d, #0x0000ff000000ff
smin v0.4s, v0.4s, v1.4s
and v0.16b, v0.16b, v1.16b
ret
ARM target
Same input; Run : clang -target arm-arm-none-eabi -mcpu=cortex-a57 -mfpu=neon-fp-armv8 -O2 -S clamp255-vec.ll -o -
before
clamp255:
.fnstart
vmov d17, r2, r3
vmov d16, r0, r1
vmov.i32 q9, #0xff
vsub.i32 q10, q9, q8
vshr.s32 q10, q10, #31
vorr q8, q10, q8
vand q8, q8, q9
vmov r0, r1, d16
vmov r2, r3, d17
bx lr
After this optimization
clamp255:
.fnstart
vmov d17, r2, r3
vmov d16, r0, r1
vmov.i32 q9, #0xff
vmin.s32 q8, q8, q9
vand q8, q8, q9
vmov r0, r1, d16
vmov r2, r3, d17
bx lr
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D67800/new/
https://reviews.llvm.org/D67800
More information about the llvm-commits
mailing list