[PATCH] D38128: Handle COPYs of physregs better (regalloc hints)
Jonas Paulsson via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 13 05:42:50 PDT 2017
jonpa updated this revision to Diff 118901.
jonpa added a comment.
Herald added a subscriber: JDevlieghere.
BPF tests updated
Slightly different code generation, but it all seems equivalent to me:
alu8.ll:
--------
master <
> patched
add: add:
# BB#0: # BB#0:
r1 += r2 <
r0 = r1 r0 = r1
> r0 += r2
exit exit
.globl and .globl and
.p2align .p2align
and: and:
# BB#0: # BB#0:
r1 &= r2 <
r0 = r1 r0 = r1
> r0 &= r2
exit exit
.globl bis .globl bis
.p2align .p2align
bis: bis:
# BB#0: # BB#0:
r1 |= r2 <
r0 = r1 r0 = r1
> r0 |= r2
exit exit
.globl xoran .globl xoran
.p2align .p2align
xorand: xorand:
# BB#0: # BB#0:
r2 ^= -1 <
r1 &= r2 <
r0 = r1 r0 = r1
> r2 ^= -1
> r0 &= r2
exit exit
.globl xor .globl xor
.p2align .p2align
xor: xor:
# BB#0: # BB#0:
r1 ^= r2 <
r0 = r1 r0 = r1
> r0 ^= r2
exit exit
basictest.ll:
-------------
test0: test0:
.cfi_startpro .cfi_startpro
# BB#0: # BB#0:
r1 += 1 <
r0 = r1 r0 = r1
> r0 += 1
exit exit
.cfi_endproc .cfi_endproc
cmp.ll:
-------
foo_cmp1: foo_cmp1:
.cfi_startpro .cfi_startpro
# BB#0: # BB#0:
if r2 s>= r1 | r0 = r2
> if r0 s>= r1
# BB#1: # BB#1:
r2 *= r1 | r0 *= r1
goto LBB0_3 goto LBB0_3
LBB0_2: LBB0_2:
r2 <<= 3 | r0 <<= 3
LBB0_3: LBB0_3:
r2 <<= 56 | r0 <<= 56
r2 s>>= 56 | r0 s>>= 56
r0 = r2 <
exit exit
foo_cmp2: foo_cmp2:
.cfi_startpro .cfi_startpro
# BB#0: # BB#0:
if r2 s> r1 g | r0 = r2
> if r0 s> r1 g
# BB#1: # BB#1:
r2 *= r1 | r0 *= r1
goto LBB1_3 goto LBB1_3
LBB1_2: LBB1_2:
r2 <<= 3 | r0 <<= 3
LBB1_3: LBB1_3:
r2 <<= 56 | r0 <<= 56
r2 s>>= 56 | r0 s>>= 56
r0 = r2 <
exit exit
foo_cmp3: foo_cmp3:
.cfi_startpro .cfi_startpro
# BB#0: # BB#0:
if r1 s>= r2 | r0 = r2
> if r1 s>= r0
# BB#1: # BB#1:
r2 *= r1 | r0 *= r1
goto LBB2_3 goto LBB2_3
LBB2_2: LBB2_2:
r2 <<= 3 | r0 <<= 3
LBB2_3: LBB2_3:
r2 <<= 56 | r0 <<= 56
r2 s>>= 56 | r0 s>>= 56
r0 = r2 <
exit exit
foo_cmp4: foo_cmp4:
.cfi_startpro .cfi_startpro
# BB#0: # BB#0:
if r1 s> r2 g | r0 = r2
> if r1 s> r0 g
# BB#1: # BB#1:
r2 *= r1 | r0 *= r1
goto LBB3_3 goto LBB3_3
LBB3_2: LBB3_2:
r2 <<= 3 | r0 <<= 3
LBB3_3: LBB3_3:
r2 <<= 56 | r0 <<= 56
r2 s>>= 56 | r0 s>>= 56
r0 = r2 <
exit exit
min: min:
.cfi_startpro .cfi_startpro
# BB#0: # BB#0:
if r2 s> r1 g | r0 = r1
> if r2 s> r0 g
# BB#1: # BB#1:
r1 = r2 | r0 = r2
LBB4_2: LBB4_2:
r0 = r1 <
exit exit
minu: minu:
.cfi_startpro .cfi_startpro
# BB#0: # BB#0:
r3 = 100 | r0 = r1
if r3 > r1 go | r1 = 100
> if r1 > r0 go
# BB#1: # BB#1:
r1 = r2 | r0 = r2
LBB5_2: LBB5_2:
r0 = r1 <
exit exit
max: max:
.cfi_startpro .cfi_startpro
# BB#0: # BB#0:
if r1 s> r2 g | r0 = r1
> if r0 s> r2 g
# BB#1: # BB#1:
r1 = r2 | r0 = r2
LBB6_2: LBB6_2:
r0 = r1 <
exit exit
meq: meq:
.cfi_startpro .cfi_startpro
# BB#0: # BB#0:
> r0 = r3
if r1 == r2 g if r1 == r2 g
# BB#1: # BB#1:
r3 = r1 | r0 = r1
LBB7_2: LBB7_2:
r0 = r3 <
exit exit
dwarfdump.ll:
-------------
Output differs:
testprog: # @te testprog: # @te
.Lfunc_begin0: .Lfunc_begin0:
.loc 1 2 0 # tes .loc 1 2 0 # tes
.cfi_sections .debug_frame .cfi_sections .debug_frame
.cfi_startproc .cfi_startproc
# BB#0: # BB#0:
#DEBUG_VALUE: testprog:myvar_a <- %R1 #DEBUG_VALUE: testprog:myvar_a <- %R1
#DEBUG_VALUE: testprog:myvar_a <- %R1 <
#DEBUG_VALUE: testprog:myvar_b <- %R2 <
#DEBUG_VALUE: testprog:myvar_b <- %R2 #DEBUG_VALUE: testprog:myvar_b <- %R2
.loc 1 5 27 prologue_end # tes | r0 = r2
r2 += r1 <
.Ltmp0: .Ltmp0:
> #DEBUG_VALUE: testprog:myvar_b <- %R0
> #DEBUG_VALUE: testprog:myvar_a <- %R1
> .loc 1 5 27 prologue_end # tes
> r0 += r1
> .Ltmp1:
.loc 1 5 19 is_stmt 0 # tes .loc 1 5 19 is_stmt 0 # tes
r1 = testprog.myvar_c ll r1 = testprog.myvar_c ll
.Ltmp1: | .Ltmp2:
r3 = *(u32 *)(r1 + 0) | r2 = *(u32 *)(r1 + 0)
.loc 1 7 27 is_stmt 1 # tes .loc 1 7 27 is_stmt 1 # tes
r2 += r3 | r0 += r2
.loc 1 7 17 is_stmt 0 # tes .loc 1 7 17 is_stmt 0 # tes
*(u32 *)(r1 + 0) = r2 | *(u32 *)(r1 + 0) = r0
.loc 1 9 9 is_stmt 1 # tes .loc 1 9 9 is_stmt 1 # tes
r0 = r2 <
exit exit
With patch I get:
Address Line Column File ISA Discriminator Flags
------------------ ------ ------ ------ --- ------------- -------------
0x0000000000000000 2 0 1 0 0 is_stmt
0x0000000000000008 5 27 1 0 0 is_stmt prologue_end
0x0000000000000010 5 19 1 0 0
0x0000000000000028 7 27 1 0 0 is_stmt
0x0000000000000030 7 17 1 0 0
0x0000000000000038 9 9 1 0 0 is_stmt
0x0000000000000040 9 9 1 0 0 is_stmt end_sequence
Master:
Address Line Column File ISA Discriminator Flags
------------------ ------ ------ ------ --- ------------- -------------
0x0000000000000000 2 0 1 0 0 is_stmt
0x0000000000000000 5 27 1 0 0 is_stmt prologue_end
0x0000000000000008 5 19 1 0 0
0x0000000000000020 7 27 1 0 0 is_stmt
0x0000000000000028 7 17 1 0 0
0x0000000000000030 9 9 1 0 0 is_stmt
0x0000000000000040 9 9 1 0 0 is_stmt end_sequence
intrinsics.ll:
--------------
bswap: # bswap: #
.cfi_startproc .cfi_startproc
# BB#0: # # BB#0: #
> r0 = r2 #
r1 = le64 r1 # r1 = le64 r1 #
r2 = le32 r2 # | r0 = le32 r0 #
r2 += r1 # | r0 += r1 #
r3 = le16 r3 # r3 = le16 r3 #
r2 += r3 # | r0 += r3 #
r0 = r2 # <
exit # exit #
objdump_intrinsics.ll:
----------------------
bswap:
r1 = le64 r1 | r0 = r2
r2 = le32 r2 | r1 = le64 r1
r2 += r1 | r0 = le32 r0
r3 = le16 r3 | r0 += r1
r2 += r3 | r3 = le16 r3
r0 = r2 | r0 += r3
exit exit
sanity.ll:
----------
foo_int: foo_int:
.cfi_startproc .cfi_startproc
# BB#0: # BB#0:
r2 += r1 <
r0 = r2 r0 = r2
> r0 += r1
exit exit
foo_char: foo_char:
.cfi_startproc .cfi_startproc
# BB#0: # BB#0:
r2 += r1 <
r2 <<= 56 <
r2 s>>= 56 <
r0 = r2 r0 = r2
> r0 += r1
> r0 <<= 56
> r0 s>>= 56
exit exit
foo_ll: foo_ll:
.cfi_startproc .cfi_startproc
# BB#0: # BB#0:
r2 += r1 <
r2 -= r3 <
r0 = r2 r0 = r2
> r0 += r1
> r0 -= r3
exit exit
foo_cmp: foo_cmp:
.cfi_startproc .cfi_startproc
# BB#0: # BB#0:
if r2 s> r1 goto LBB5_2 | r0 = r1
> if r2 s> r0 goto LBB5_2
# BB#1: # BB#1:
r1 = r2 | r0 = r2
LBB5_2: LBB5_2:
r0 = r1 <
exit exit
foo_muldiv: foo_muldiv:
.cfi_startproc .cfi_startproc
# BB#0: # BB#0:
> r0 = r2
if r1 == 0 goto LBB6_2 if r1 == 0 goto LBB6_2
# BB#1: # BB#1:
r2 *= r3 | r0 *= r3
goto LBB6_3 goto LBB6_3
LBB6_2: LBB6_2:
r3 <<= 32 r3 <<= 32
r3 >>= 32 r3 >>= 32
r4 <<= 32 r4 <<= 32
r4 >>= 32 r4 >>= 32
r4 /= r3 r4 /= r3
r2 = r4 | r0 = r4
LBB6_3: LBB6_3:
r0 = r2 <
exit exit
shifts.ll:
----------
.p2align 3 .p2align 3
lshr8: lshr8:
# BB#0: # BB#0:
r1 >>= r2 <
r1 &= 255 <
r0 = r1 r0 = r1
> r0 >>= r2
> r0 &= 255
exit exit
ashr8: ashr8:
# BB#0: # BB#0:
r1 s>>= r2 <
r0 = r1 r0 = r1
> r0 s>>= r2
exit exit
shl8: shl8:
# BB#0: # BB#0:
r1 <<= r2 <
r1 &= 255 <
r0 = r1 r0 = r1
> r0 <<= r2
> r0 &= 255
exit exit
lshr16: lshr16:
# BB#0: # BB#0:
r1 >>= r2 <
r1 &= 65535 <
r0 = r1 r0 = r1
> r0 >>= r2
> r0 &= 65535
exit exit
ashr16: ashr16:
# BB#0: # BB#0:
r1 s>>= r2 <
r0 = r1 r0 = r1
> r0 s>>= r2
exit exit
shl16: shl16:
# BB#0: # BB#0:
r1 <<= r2 <
r1 &= 65535 <
r0 = r1 r0 = r1
> r0 <<= r2
> r0 &= 65535
exit exit
lshr32: lshr32:
# BB#0: # BB#0:
r1 >>= r2 <
r1 <<= 32 <
r1 >>= 32 <
r0 = r1 r0 = r1
> r0 >>= r2
> r0 <<= 32
> r0 >>= 32
exit exit
ashr32: ashr32:
# BB#0: # BB#0:
r1 s>>= r2 <
r0 = r1 r0 = r1
> r0 s>>= r2
exit exit
shl32: shl32:
# BB#0: # BB#0:
r1 <<= r2 <
r1 <<= 32 <
r1 >>= 32 <
r0 = r1 r0 = r1
> r0 <<= r2
> r0 <<= 32
> r0 >>= 32
exit exit
lshr64: lshr64:
# BB#0: # BB#0:
r1 >>= r2 <
r0 = r1 r0 = r1
> r0 >>= r2
exit exit
ashr64: ashr64:
# BB#0: # BB#0:
r1 s>>= r2 <
r0 = r1 r0 = r1
> r0 s>>= r2
exit exit
shl64: shl64:
# BB#0: # BB#0:
r1 <<= r2 <
r0 = r1 r0 = r1
> r0 <<= r2
exit exit
https://reviews.llvm.org/D38128
Files:
include/llvm/CodeGen/MachineRegisterInfo.h
include/llvm/Target/TargetRegisterInfo.h
lib/CodeGen/CalcSpillWeights.cpp
lib/CodeGen/TargetRegisterInfo.cpp
test/CodeGen/AArch64/arm64-aapcs.ll
test/CodeGen/AArch64/func-argpassing.ll
test/CodeGen/AArch64/swifterror.ll
test/CodeGen/AArch64/win64_vararg.ll
test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll
test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
test/CodeGen/AMDGPU/ret.ll
test/CodeGen/AMDGPU/sgpr-control-flow.ll
test/CodeGen/ARM/longMAC.ll
test/CodeGen/ARM/select_xform.ll
test/CodeGen/ARM/ssp-data-layout.ll
test/CodeGen/ARM/struct_byval_arm_t1_t2.ll
test/CodeGen/ARM/swifterror.ll
test/CodeGen/BPF/alu8.ll
test/CodeGen/BPF/basictest.ll
test/CodeGen/BPF/cmp.ll
test/CodeGen/BPF/dwarfdump.ll
test/CodeGen/BPF/intrinsics.ll
test/CodeGen/BPF/objdump_intrinsics.ll
test/CodeGen/BPF/sanity.ll
test/CodeGen/BPF/shifts.ll
test/CodeGen/SystemZ/call-03.ll
test/CodeGen/SystemZ/swift-return.ll
test/CodeGen/SystemZ/swifterror.ll
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D38128.118901.patch
Type: text/x-patch
Size: 77434 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171013/2a915a26/attachment-0001.bin>
More information about the llvm-commits
mailing list