[PATCH] D150388: [CodeGen]Allow targets to use target specific COPY instructions for live range splitting
Alexander Kornienko via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 18 15:45:40 PDT 2023
alexfh added a comment.
I'm not sure this llvm-reduce'd IR snippet retains the essence of the problem, but maybe you could look at it and see if there's an obvious issue with the codegen?
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: cold noreturn nounwind
declare void @llvm.ubsantrap(i8 immarg) #0
declare i1 @_f1()
declare { i64, i64 } @_f2(ptr)
declare { i64, i8 } @_f3()
declare void @_f4()
define fastcc void @_f(ptr %0, ptr %1, i64 %2, ptr %3, ptr %4, i1 %5, i1 %6, i24 %7, i1 %8) #1 {
%10 = call i1 @_f1()
%11 = icmp eq i24 %7, 0
br i1 %11, label %13, label %12
12: ; preds = %9
call void @_f4()
br label %common.ret
common.ret: ; preds = %22, %18, %12
ret void
13: ; preds = %20, %9
%14 = phi i40 [ undef, %9 ], [ %21, %20 ]
br i1 %6, label %15, label %16
15: ; preds = %13
call void @llvm.ubsantrap(i8 0)
unreachable
16: ; preds = %13
%17 = call { i64, i64 } @_f2(ptr %3)
br i1 %5, label %20, label %18
18: ; preds = %16
%19 = and i40 %14, 4294967295
store ptr null, ptr %0, align 8
store i40 %19, ptr %1, align 4
br i1 %8, label %common.ret, label %20
20: ; preds = %18, %16
%21 = phi i40 [ %14, %16 ], [ %19, %18 ]
br i1 %8, label %22, label %13
22: ; preds = %20
store ptr null, ptr %0, align 8
%23 = call { i64, i8 } @_f3()
%24 = load ptr, ptr %0, align 8
%25 = icmp eq ptr %24, null
br i1 %25, label %26, label %common.ret
26: ; preds = %22
store volatile i32 0, ptr null, align 4294967296
unreachable
}
attributes #0 = { cold noreturn nounwind }
attributes #1 = { "frame-pointer"="all" }
The difference in the generated x86-64 assembly (with `clang -O1 -S`, before this patch and after it) is as follows:
.text
.file "reduced.ll"
.globl _f # -- Begin function _f
.p2align 4, 0x90
.type _f, at function
_f: # @_f
.cfi_startproc
# %bb.0:
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
subq $24, %rsp
.cfi_offset %rbx, -56
.cfi_offset %r12, -48
.cfi_offset %r13, -40
.cfi_offset %r14, -32
.cfi_offset %r15, -24
movl %r9d, %r14d
- movq %rcx, %r12
- movq %rsi, %r15
- movq %rdi, -48(%rbp) # 8-byte Spill
+ movq %rcx, %r15
+ movq %rsi, %r12
+ movq %rdi, %rbx
movzbl 32(%rbp), %r13d
- movzbl 16(%rbp), %ebx
+ movzbl 16(%rbp), %eax
+ movb %al, -41(%rbp) # 1-byte Spill
callq _f1 at PLT
testl $16777215, 24(%rbp) # imm = 0xFFFFFF
je .LBB0_1
-# %bb.5:
+# %bb.9:
addq $24, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
.cfi_def_cfa %rsp, 8
jmp _f4 at PLT # TAILCALL
.LBB0_1: # %.preheader
.cfi_def_cfa %rbp, 16
- movq %r15, -56(%rbp) # 8-byte Spill
- movl %r14d, %r15d
- movq -48(%rbp), %r14 # 8-byte Reload
- testb $1, %bl
- jne .LBB0_7
+ movq %rbx, -56(%rbp) # 8-byte Spill
+ testb $1, -41(%rbp) # 1-byte Folded Reload
+ jne .LBB0_11
# %bb.2: # %.preheader.split.preheader
# implicit-def: $rbx
jmp .LBB0_3
.p2align 4, 0x90
-.LBB0_4: # in Loop: Header=BB0_3 Depth=1
- movq %r14, %rax
+.LBB0_6: # in Loop: Header=BB0_3 Depth=1
testb $1, %r13b
- jne .LBB0_11
+ jne .LBB0_7
.LBB0_3: # %.preheader.split
# =>This Inner Loop Header: Depth=1
- movq %r12, %rdi
+ movq %r15, %rdi
callq _f2 at PLT
- testb $1, %r15b
- jne .LBB0_4
-# %bb.8: # in Loop: Header=BB0_3 Depth=1
- movq $0, (%r14)
- movq -56(%rbp), %rcx # 8-byte Reload
- movl %ebx, (%rcx)
- movb $0, 4(%rcx)
- testb $1, %r13b
+ testb $1, %r14b
jne .LBB0_6
-# %bb.9: # in Loop: Header=BB0_3 Depth=1
- movq %r14, %rax
- movl %ebx, %ebx
- testb $1, %r13b
- je .LBB0_3
-.LBB0_11:
+# %bb.4: # in Loop: Header=BB0_3 Depth=1
+ movq -56(%rbp), %rax # 8-byte Reload
movq $0, (%rax)
- movq %rax, %rbx
+ movl %ebx, (%r12)
+ movb $0, 4(%r12)
+ testb $1, %r13b
+ jne .LBB0_10
+# %bb.5: # in Loop: Header=BB0_3 Depth=1
+ movl %ebx, %ebx
+ jmp .LBB0_6
+.LBB0_7:
+ movq -56(%rbp), %rbx # 8-byte Reload
+ movq $0, (%rbx)
callq _f3 at PLT
cmpq $0, (%rbx)
- je .LBB0_12
-.LBB0_6: # %common.ret
+ je .LBB0_8
+.LBB0_10: # %common.ret
addq $24, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
.cfi_def_cfa %rsp, 8
retq
-.LBB0_7:
+.LBB0_11:
.cfi_def_cfa %rbp, 16
ud1l (%eax), %eax
-.LBB0_12:
+.LBB0_8:
movl $0, 0
.Lfunc_end0:
.size _f, .Lfunc_end0-_f
.cfi_endproc
# -- End function
.section ".note.GNU-stack","", at progbits
.addrsig
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D150388/new/
https://reviews.llvm.org/D150388
More information about the llvm-commits
mailing list