[PATCH] D150388: [CodeGen]Allow targets to use target specific COPY instructions for live range splitting

Matt Arsenault via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 19 18:31:52 PDT 2023


arsenm added a comment.

In D150388#4515524 <https://reviews.llvm.org/D150388#4515524>, @vitalybuka wrote:

> My repro:
>
>   ; ModuleID = '<bc file>'
>   target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
>   target triple = "x86_64-grtev4-linux-gnu"
>   
>   %"struct.devtools::inliner::CallArg2.2307.4010.6850.7702.9690.1453.4566.8245.24575.24695.24965.25235.25445.26645.26877.26937.27207.27387.27673.27761.27849.27893.27915.27959.27981.28003.28016.28068.28120.28133.28185.28237.28250.50.102.115" = type { %"class.std::__u::optional.2306.4009.6849.7701.9689.1452.4565.8244.24574.24694.24964.25234.25444.26644.26876.26936.27206.27386.27672.27760.27848.27892.27914.27958.27980.28002.28015.28067.28119.28132.28184.28236.28249.49.101.114" }
>   %"class.std::__u::optional.2306.4009.6849.7701.9689.1452.4565.8244.24574.24694.24964.25234.25444.26644.26876.26936.27206.27386.27672.27760.27848.27892.27914.27958.27980.28002.28015.28067.28119.28132.28184.28236.28249.49.101.114" = type { %"struct.std::__u::__optional_move_assign_base.base.2305.4008.6848.7700.9688.1451.4564.8243.24573.24693.24963.25233.25443.26643.26875.26935.27205.27385.27671.27759.27847.27891.27913.27957.27979.28001.28014.28066.28118.28131.28183.28235.28248.48.100.113", [3 x i8] }
>   %"struct.std::__u::__optional_move_assign_base.base.2305.4008.6848.7700.9688.1451.4564.8243.24573.24693.24963.25233.25443.26643.26875.26935.27205.27385.27671.27759.27847.27891.27913.27957.27979.28001.28014.28066.28118.28131.28183.28235.28248.48.100.113" = type { %"struct.std::__u::__optional_copy_assign_base.base.2304.4007.6847.7699.9687.1450.4563.8242.24572.24692.24962.25232.25442.26642.26874.26934.27204.27384.27670.27758.27846.27890.27912.27956.27978.28000.28013.28065.28117.28130.28182.28234.28247.47.99.112" }
>   %"struct.std::__u::__optional_copy_assign_base.base.2304.4007.6847.7699.9687.1450.4563.8242.24572.24692.24962.25232.25442.26642.26874.26934.27204.27384.27670.27758.27846.27890.27912.27956.27978.28000.28013.28065.28117.28130.28182.28234.28247.47.99.112" = type { %"struct.std::__u::__optional_move_base.base.2303.4006.6846.7698.9686.1449.4562.8241.24571.24691.24961.25231.25441.26641.26873.26933.27203.27383.27669.27757.27845.27889.27911.27955.27977.27999.28012.28064.28116.28129.28181.28233.28246.46.98.111" }
>   %"struct.std::__u::__optional_move_base.base.2303.4006.6846.7698.9686.1449.4562.8241.24571.24691.24961.25231.25441.26641.26873.26933.27203.27383.27669.27757.27845.27889.27911.27955.27977.27999.28012.28064.28116.28129.28181.28233.28246.46.98.111" = type { %"struct.std::__u::__optional_copy_base.base.2302.4005.6845.7697.9685.1448.4561.8240.24570.24690.24960.25230.25440.26640.26872.26932.27202.27382.27668.27756.27844.27888.27910.27954.27976.27998.28011.28063.28115.28128.28180.28232.28245.45.97.110" }
>   %"struct.std::__u::__optional_copy_base.base.2302.4005.6845.7697.9685.1448.4561.8240.24570.24690.24960.25230.25440.26640.26872.26932.27202.27382.27668.27756.27844.27888.27910.27954.27976.27998.28011.28063.28115.28128.28180.28232.28245.45.97.110" = type { %"struct.std::__u::__optional_storage_base.base.2301.4004.6844.7696.9684.1447.4560.8239.24569.24689.24959.25229.25439.26639.26871.26931.27201.27381.27667.27755.27843.27887.27909.27953.27975.27997.28010.28062.28114.28127.28179.28231.28244.44.96.109" }
>   %"struct.std::__u::__optional_storage_base.base.2301.4004.6844.7696.9684.1447.4560.8239.24569.24689.24959.25229.25439.26639.26871.26931.27201.27381.27667.27755.27843.27887.27909.27953.27975.27997.28010.28062.28114.28127.28179.28231.28244.44.96.109" = type { %"struct.std::__u::__optional_destruct_base.base.2300.4003.6843.7695.9683.1446.4559.8238.24568.24688.24958.25228.25438.26638.26870.26930.27200.27380.27666.27754.27842.27886.27908.27952.27974.27996.28009.28061.28113.28126.28178.28230.28243.43.95.108" }
>   %"struct.std::__u::__optional_destruct_base.base.2300.4003.6843.7695.9683.1446.4559.8238.24568.24688.24958.25228.25438.26638.26870.26930.27200.27380.27666.27754.27842.27886.27908.27952.27974.27996.28009.28061.28113.28126.28178.28230.28243.43.95.108" = type { %union.anon.40.2299.4002.6842.7694.9682.1445.4558.8237.24567.24687.24957.25227.25437.26637.26869.26929.27199.27379.27665.27753.27841.27885.27907.27951.27973.27995.28008.28060.28112.28125.28177.28229.28242.42.94.107, i8 }
>   %union.anon.40.2299.4002.6842.7694.9682.1445.4558.8237.24567.24687.24957.25227.25437.26637.26869.26929.27199.27379.27665.27753.27841.27885.27907.27951.27973.27995.28008.28060.28112.28125.28177.28229.28242.42.94.107 = type { %"class.clang::CharSourceRange.2289.3992.6832.7684.9672.1435.4548.8227.24566.24686.24956.25226.25436.26636.26868.26928.27198.27378.27664.27752.27840.27884.27906.27950.27972.27994.28007.28059.28111.28124.28176.28228.28241.41.93.106" }
>   %"class.clang::CharSourceRange.2289.3992.6832.7684.9672.1435.4548.8227.24566.24686.24956.25226.25436.26636.26868.26928.27198.27378.27664.27752.27840.27884.27906.27950.27972.27994.28007.28059.28111.28124.28176.28228.28241.41.93.106" = type <{ %"class.clang::SourceRange.2288.3991.6831.7683.9671.1434.4547.8226.24565.24685.24955.25225.25435.26635.26867.26927.27197.27377.27663.27751.27839.27883.27905.27949.27971.27993.28006.28058.28110.28123.28175.28227.28240.40.92.105", i8, [3 x i8] }>
>   %"class.clang::SourceRange.2288.3991.6831.7683.9671.1434.4547.8226.24565.24685.24955.25225.25435.26635.26867.26927.27197.27377.27663.27751.27839.27883.27905.27949.27971.27993.28006.28058.28110.28123.28175.28227.28240.40.92.105" = type { %"class.clang::SourceLocation.2287.3990.6830.7682.9670.1433.4546.8225.24555.24675.24945.25215.25425.26625.26857.26917.27187.27367.27653.27741.27829.27873.27895.27939.27961.27983.28005.28057.28109.28122.28174.28226.28239.39.91.104", %"class.clang::SourceLocation.2287.3990.6830.7682.9670.1433.4546.8225.24555.24675.24945.25215.25425.26625.26857.26917.27187.27367.27653.27741.27829.27873.27895.27939.27961.27983.28005.28057.28109.28122.28174.28226.28239.39.91.104" }
>   %"class.clang::SourceLocation.2287.3990.6830.7682.9670.1433.4546.8225.24555.24675.24945.25215.25425.26625.26857.26917.27187.27367.27653.27741.27829.27873.27895.27939.27961.27983.28005.28057.28109.28122.28174.28226.28239.39.91.104" = type { i32 }
>   %"struct.std::__u::__optional_destruct_base.2555.4258.7098.7950.9938.1701.4814.8493.24576.24696.24966.25236.25446.26646.26885.26945.27215.27395.27674.27762.27850.27894.27916.27960.27982.28004.28017.28069.28121.28134.28186.28238.28251.51.103.116" = type { %union.anon.40.2299.4002.6842.7694.9682.1445.4558.8237.24567.24687.24957.25227.25437.26637.26869.26929.27199.27379.27665.27753.27841.27885.27907.27951.27973.27995.28008.28060.28112.28125.28177.28229.28242.42.94.107, i8, [3 x i8] }
>   
>   ; Function Attrs: noinline
>   define void @_ZN8devtools7inliner14ParseCallArgs3ERKN5clang8CallExprERKNS1_12FunctionDeclERNS1_10ASTContextE(ptr %0, ptr %1, i40 %2, ptr %3, i32 %4) #0 {
>     br label %9
>   
>   6:                                                ; preds = %21, %18
>     %.sroa.0.0 = phi ptr [ %24, %21 ], [ null, %18 ]
>     %.sroa.5.0 = phi ptr [ %25, %21 ], [ null, %18 ]
>     %7 = add i32 %10, 1
>     %8 = icmp eq i32 %10, %4
>     br i1 %8, label %27, label %9
>   
>   9:                                                ; preds = %6, %5
>     %.sroa.5.1 = phi ptr [ null, %5 ], [ %.sroa.5.0, %6 ]
>     %10 = phi i32 [ 0, %5 ], [ %7, %6 ]
>     %11 = phi i40 [ undef, %5 ], [ %19, %6 ]
>     %12 = call ptr @_ZN5clang4Expr27IgnoreUnlessSpelledInSourceEv()
>     %13 = load i8, ptr %1, align 8
>     %14 = icmp ult i8 %13, -5
>     %15 = and i40 %11, 4294967295
>     br i1 %14, label %18, label %16
>   
>   16:                                               ; preds = %9
>     %17 = load volatile { i64, i64 }, ptr null, align 4294967296
>     br label %18
>   
>   18:                                               ; preds = %16, %9
>     %19 = phi i40 [ %15, %9 ], [ %2, %16 ]
>     %20 = icmp ugt ptr %.sroa.5.1, %0
>     br i1 %20, label %6, label %21
>   
>   21:                                               ; preds = %18
>     %22 = icmp eq ptr %.sroa.5.1, null
>     %23 = zext i1 %22 to i64
>     %24 = call ptr @_Znwm(i64 0)
>     %25 = getelementptr %"struct.devtools::inliner::CallArg2.2307.4010.6850.7702.9690.1453.4566.8245.24575.24695.24965.25235.25445.26645.26877.26937.27207.27387.27673.27761.27849.27893.27915.27959.27981.28003.28016.28068.28120.28133.28185.28237.28250.50.102.115", ptr %3, i64 %23
>     %26 = getelementptr i8, ptr %24, i64 8
>     store i40 %19, ptr %26, align 4
>     br label %6
>   
>   27:                                               ; preds = %6
>     %28 = getelementptr %"struct.std::__u::__optional_destruct_base.2555.4258.7098.7950.9938.1701.4814.8493.24576.24696.24966.25236.25446.26646.26885.26945.27215.27395.27674.27762.27850.27894.27916.27960.27982.28004.28017.28069.28121.28134.28186.28238.28251.51.103.116", ptr %.sroa.0.0, i64 0, i32 1
>     %29 = load i8, ptr %28, align 4
>     %30 = icmp eq i8 %29, 0
>     br i1 %30, label %32, label %31
>   
>   31:                                               ; preds = %27
>     call void @__ubsan_handle_load_invalid_value_abort(ptr %0)
>     unreachable
>   
>   32:                                               ; preds = %27
>     ret void
>   
>   ; uselistorder directives
>     uselistorder i32 %10, { 1, 0 }
>   }
>   
>   define void @_ZN8devtools7inliner14ParseCallArgs2ERKN5clang8CallExprERKNS1_12FunctionDeclERNS1_10ASTContextE(ptr %0, ptr %1) {
>     call void @_ZN8devtools7inliner14ParseCallArgs3ERKN5clang8CallExprERKNS1_12FunctionDeclERNS1_10ASTContextE(ptr %1, ptr %0, i40 0, ptr null, i32 0)
>     ret void
>   }
>   
>   declare ptr @_ZN5clang4Expr27IgnoreUnlessSpelledInSourceEv()
>   
>   declare void @__ubsan_handle_load_invalid_value_abort(ptr)
>   
>   declare ptr @_Znwm(i64)
>   
>   attributes #0 = { noinline "frame-pointer"="all" }
>
> Before this patch __ubsan_handle_load_invalid_value_abort was not reached, now it is.
>
> llc ./llvm-reduce-42f916.ll -O3 -o ./llvm-reduce-42f916.ll.<revision>.s
> diff -u --color ./llvm-reduce-42f916.ll.eb98abab2c83 <https://reviews.llvm.org/rGeb98abab2c83c9c101c4749c93836108657d6164>.s ./llvm-reduce-42f916.ll.b7836d856206 <https://reviews.llvm.org/rGb7836d856206ec39509d42529f958c920368166b>.s :
>
>   --- ./llvm-reduce-42f916.ll.eb98abab2c83.s	2023-07-19 10:38:05.571863413 -0700
>   +++ ./llvm-reduce-42f916.ll.b7836d856206.s	2023-07-19 10:39:47.587632219 -0700
>   @@ -23,14 +23,14 @@
>    	.cfi_offset %r14, -32
>    	.cfi_offset %r15, -24
>    	movl	%r8d, %r14d
>   -	movq	%rcx, -64(%rbp)                 # 8-byte Spill
>   -	movq	%rdx, -56(%rbp)                 # 8-byte Spill
>   -	movq	%rsi, %r13
>   +	movq	%rcx, -56(%rbp)                 # 8-byte Spill
>   +	movq	%rdx, -48(%rbp)                 # 8-byte Spill
>   +	movq	%rsi, %r12
>    	movq	%rdi, %r15
>    	incl	%r14d
>    	xorl	%ebx, %ebx
>   -                                        # implicit-def: $r12
>   -	movq	%rsi, -48(%rbp)                 # 8-byte Spill
>   +                                        # implicit-def: $rax
>   +                                        # kill: killed $rax
>    	jmp	.LBB0_3
>    	.p2align	4, 0x90
>    .LBB0_1:                                #   in Loop: Header=BB0_3 Depth=1
>   @@ -41,41 +41,37 @@
>    	xorl	%edi, %edi
>    	callq	_Znwm at PLT
>    	shlq	$4, %r15
>   -	addq	-64(%rbp), %r15                 # 8-byte Folded Reload
>   -	movq	%r12, %rcx
>   +	addq	-56(%rbp), %r15                 # 8-byte Folded Reload
>   +	movq	-64(%rbp), %rdx                 # 8-byte Reload
>   +	movq	%rdx, %rcx
>    	shrq	$32, %rcx
>    	movb	%cl, 12(%rax)
>   -	movl	%r12d, 8(%rax)
>   +	movl	%edx, 8(%rax)
>    	movq	%r15, %rbx
>    	movq	%r13, %r15
>   -	movq	-48(%rbp), %r13                 # 8-byte Reload
>    	decl	%r14d
>   -	je	.LBB0_8
>   +	je	.LBB0_7
>    .LBB0_3:                                # =>This Inner Loop Header: Depth=1
>    	callq	_ZN5clang4Expr27IgnoreUnlessSpelledInSourceEv at PLT
>   -	cmpb	$-5, (%r13)
>   -	jae	.LBB0_5
>   +	cmpb	$-5, (%r12)
>   +	jb	.LBB0_5
>    # %bb.4:                                #   in Loop: Header=BB0_3 Depth=1
>   -	movl	%r12d, %r12d
>   -	cmpq	%r15, %rbx
>   -	jbe	.LBB0_1
>   -	jmp	.LBB0_7
>   -	.p2align	4, 0x90
>   -.LBB0_5:                                #   in Loop: Header=BB0_3 Depth=1
>    	movq	0, %rax
>    	movq	8, %rax
>   -	movq	-56(%rbp), %r12                 # 8-byte Reload
>   +	movq	-48(%rbp), %rax                 # 8-byte Reload
>   +	movq	%rax, -64(%rbp)                 # 8-byte Spill
>   +.LBB0_5:                                #   in Loop: Header=BB0_3 Depth=1
>    	cmpq	%r15, %rbx
>    	jbe	.LBB0_1
>   -.LBB0_7:                                #   in Loop: Header=BB0_3 Depth=1
>   +# %bb.6:                                #   in Loop: Header=BB0_3 Depth=1
>    	xorl	%eax, %eax
>    	xorl	%ebx, %ebx
>    	decl	%r14d
>    	jne	.LBB0_3
>   -.LBB0_8:
>   +.LBB0_7:
>    	cmpb	$0, 12(%rax)
>   -	jne	.LBB0_10
>   -# %bb.9:
>   +	jne	.LBB0_9
>   +# %bb.8:
>    	addq	$24, %rsp
>    	popq	%rbx
>    	popq	%r12
>   @@ -85,7 +81,7 @@
>    	popq	%rbp
>    	.cfi_def_cfa %rsp, 8
>    	retq
>   -.LBB0_10:
>   +.LBB0_9:
>    	.cfi_def_cfa %rbp, 16
>    	movq	%r15, %rdi
>    	callq	__ubsan_handle_load_invalid_value_abort at PLT
>
> No diff b7836d856206 <https://reviews.llvm.org/rGb7836d856206ec39509d42529f958c920368166b> vs 645f6dcd69a5 <https://reviews.llvm.org/rG645f6dcd69a5315dbe2a6b49fdd8d356512544e8>(HEAD)

I see the diff disappear if I make isCopyInstr skip isCopyInstrImpl. I'm not seeing anything clearly wrong with the codegen decisions here. The apparent block body removal in %bb.4, and the implicit_def; kill in the entry look suspicious, but I'm not seeing how it's wrong.

The isCopyInstrImpl changes 1 spill weight based on an identity mov. That just triggers a bunch of other different spilling decisions and you end up with different code.

If I step back and look at the original IR, it's branching on undef. From the entry in %bb, on the first branch to %bb.7, it's branching on undef here:

  bb:
     br label %bb7
  
  bb7:
   ...
    %i9 = phi i40 [ undef, %bb ], [ %i17, %bb5 ]
    ...
    %i13 = and i40 %i9, 4294967295
    br i1 %i12, label %bb16, label %bb14

Maybe this is just an artifact of reduction? If you try opt-bisect-limit, does it point at some other pass?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150388/new/

https://reviews.llvm.org/D150388



More information about the llvm-commits mailing list