[PATCH] D150388: [CodeGen]Allow targets to use target specific COPY instructions for live range splitting
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 19 18:31:52 PDT 2023
arsenm added a comment.
In D150388#4515524 <https://reviews.llvm.org/D150388#4515524>, @vitalybuka wrote:
> My repro:
>
> ; ModuleID = '<bc file>'
> target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-grtev4-linux-gnu"
>
> %"struct.devtools::inliner::CallArg2.2307.4010.6850.7702.9690.1453.4566.8245.24575.24695.24965.25235.25445.26645.26877.26937.27207.27387.27673.27761.27849.27893.27915.27959.27981.28003.28016.28068.28120.28133.28185.28237.28250.50.102.115" = type { %"class.std::__u::optional.2306.4009.6849.7701.9689.1452.4565.8244.24574.24694.24964.25234.25444.26644.26876.26936.27206.27386.27672.27760.27848.27892.27914.27958.27980.28002.28015.28067.28119.28132.28184.28236.28249.49.101.114" }
> %"class.std::__u::optional.2306.4009.6849.7701.9689.1452.4565.8244.24574.24694.24964.25234.25444.26644.26876.26936.27206.27386.27672.27760.27848.27892.27914.27958.27980.28002.28015.28067.28119.28132.28184.28236.28249.49.101.114" = type { %"struct.std::__u::__optional_move_assign_base.base.2305.4008.6848.7700.9688.1451.4564.8243.24573.24693.24963.25233.25443.26643.26875.26935.27205.27385.27671.27759.27847.27891.27913.27957.27979.28001.28014.28066.28118.28131.28183.28235.28248.48.100.113", [3 x i8] }
> %"struct.std::__u::__optional_move_assign_base.base.2305.4008.6848.7700.9688.1451.4564.8243.24573.24693.24963.25233.25443.26643.26875.26935.27205.27385.27671.27759.27847.27891.27913.27957.27979.28001.28014.28066.28118.28131.28183.28235.28248.48.100.113" = type { %"struct.std::__u::__optional_copy_assign_base.base.2304.4007.6847.7699.9687.1450.4563.8242.24572.24692.24962.25232.25442.26642.26874.26934.27204.27384.27670.27758.27846.27890.27912.27956.27978.28000.28013.28065.28117.28130.28182.28234.28247.47.99.112" }
> %"struct.std::__u::__optional_copy_assign_base.base.2304.4007.6847.7699.9687.1450.4563.8242.24572.24692.24962.25232.25442.26642.26874.26934.27204.27384.27670.27758.27846.27890.27912.27956.27978.28000.28013.28065.28117.28130.28182.28234.28247.47.99.112" = type { %"struct.std::__u::__optional_move_base.base.2303.4006.6846.7698.9686.1449.4562.8241.24571.24691.24961.25231.25441.26641.26873.26933.27203.27383.27669.27757.27845.27889.27911.27955.27977.27999.28012.28064.28116.28129.28181.28233.28246.46.98.111" }
> %"struct.std::__u::__optional_move_base.base.2303.4006.6846.7698.9686.1449.4562.8241.24571.24691.24961.25231.25441.26641.26873.26933.27203.27383.27669.27757.27845.27889.27911.27955.27977.27999.28012.28064.28116.28129.28181.28233.28246.46.98.111" = type { %"struct.std::__u::__optional_copy_base.base.2302.4005.6845.7697.9685.1448.4561.8240.24570.24690.24960.25230.25440.26640.26872.26932.27202.27382.27668.27756.27844.27888.27910.27954.27976.27998.28011.28063.28115.28128.28180.28232.28245.45.97.110" }
> %"struct.std::__u::__optional_copy_base.base.2302.4005.6845.7697.9685.1448.4561.8240.24570.24690.24960.25230.25440.26640.26872.26932.27202.27382.27668.27756.27844.27888.27910.27954.27976.27998.28011.28063.28115.28128.28180.28232.28245.45.97.110" = type { %"struct.std::__u::__optional_storage_base.base.2301.4004.6844.7696.9684.1447.4560.8239.24569.24689.24959.25229.25439.26639.26871.26931.27201.27381.27667.27755.27843.27887.27909.27953.27975.27997.28010.28062.28114.28127.28179.28231.28244.44.96.109" }
> %"struct.std::__u::__optional_storage_base.base.2301.4004.6844.7696.9684.1447.4560.8239.24569.24689.24959.25229.25439.26639.26871.26931.27201.27381.27667.27755.27843.27887.27909.27953.27975.27997.28010.28062.28114.28127.28179.28231.28244.44.96.109" = type { %"struct.std::__u::__optional_destruct_base.base.2300.4003.6843.7695.9683.1446.4559.8238.24568.24688.24958.25228.25438.26638.26870.26930.27200.27380.27666.27754.27842.27886.27908.27952.27974.27996.28009.28061.28113.28126.28178.28230.28243.43.95.108" }
> %"struct.std::__u::__optional_destruct_base.base.2300.4003.6843.7695.9683.1446.4559.8238.24568.24688.24958.25228.25438.26638.26870.26930.27200.27380.27666.27754.27842.27886.27908.27952.27974.27996.28009.28061.28113.28126.28178.28230.28243.43.95.108" = type { %union.anon.40.2299.4002.6842.7694.9682.1445.4558.8237.24567.24687.24957.25227.25437.26637.26869.26929.27199.27379.27665.27753.27841.27885.27907.27951.27973.27995.28008.28060.28112.28125.28177.28229.28242.42.94.107, i8 }
> %union.anon.40.2299.4002.6842.7694.9682.1445.4558.8237.24567.24687.24957.25227.25437.26637.26869.26929.27199.27379.27665.27753.27841.27885.27907.27951.27973.27995.28008.28060.28112.28125.28177.28229.28242.42.94.107 = type { %"class.clang::CharSourceRange.2289.3992.6832.7684.9672.1435.4548.8227.24566.24686.24956.25226.25436.26636.26868.26928.27198.27378.27664.27752.27840.27884.27906.27950.27972.27994.28007.28059.28111.28124.28176.28228.28241.41.93.106" }
> %"class.clang::CharSourceRange.2289.3992.6832.7684.9672.1435.4548.8227.24566.24686.24956.25226.25436.26636.26868.26928.27198.27378.27664.27752.27840.27884.27906.27950.27972.27994.28007.28059.28111.28124.28176.28228.28241.41.93.106" = type <{ %"class.clang::SourceRange.2288.3991.6831.7683.9671.1434.4547.8226.24565.24685.24955.25225.25435.26635.26867.26927.27197.27377.27663.27751.27839.27883.27905.27949.27971.27993.28006.28058.28110.28123.28175.28227.28240.40.92.105", i8, [3 x i8] }>
> %"class.clang::SourceRange.2288.3991.6831.7683.9671.1434.4547.8226.24565.24685.24955.25225.25435.26635.26867.26927.27197.27377.27663.27751.27839.27883.27905.27949.27971.27993.28006.28058.28110.28123.28175.28227.28240.40.92.105" = type { %"class.clang::SourceLocation.2287.3990.6830.7682.9670.1433.4546.8225.24555.24675.24945.25215.25425.26625.26857.26917.27187.27367.27653.27741.27829.27873.27895.27939.27961.27983.28005.28057.28109.28122.28174.28226.28239.39.91.104", %"class.clang::SourceLocation.2287.3990.6830.7682.9670.1433.4546.8225.24555.24675.24945.25215.25425.26625.26857.26917.27187.27367.27653.27741.27829.27873.27895.27939.27961.27983.28005.28057.28109.28122.28174.28226.28239.39.91.104" }
> %"class.clang::SourceLocation.2287.3990.6830.7682.9670.1433.4546.8225.24555.24675.24945.25215.25425.26625.26857.26917.27187.27367.27653.27741.27829.27873.27895.27939.27961.27983.28005.28057.28109.28122.28174.28226.28239.39.91.104" = type { i32 }
> %"struct.std::__u::__optional_destruct_base.2555.4258.7098.7950.9938.1701.4814.8493.24576.24696.24966.25236.25446.26646.26885.26945.27215.27395.27674.27762.27850.27894.27916.27960.27982.28004.28017.28069.28121.28134.28186.28238.28251.51.103.116" = type { %union.anon.40.2299.4002.6842.7694.9682.1445.4558.8237.24567.24687.24957.25227.25437.26637.26869.26929.27199.27379.27665.27753.27841.27885.27907.27951.27973.27995.28008.28060.28112.28125.28177.28229.28242.42.94.107, i8, [3 x i8] }
>
> ; Function Attrs: noinline
> define void @_ZN8devtools7inliner14ParseCallArgs3ERKN5clang8CallExprERKNS1_12FunctionDeclERNS1_10ASTContextE(ptr %0, ptr %1, i40 %2, ptr %3, i32 %4) #0 {
> br label %9
>
> 6: ; preds = %21, %18
> %.sroa.0.0 = phi ptr [ %24, %21 ], [ null, %18 ]
> %.sroa.5.0 = phi ptr [ %25, %21 ], [ null, %18 ]
> %7 = add i32 %10, 1
> %8 = icmp eq i32 %10, %4
> br i1 %8, label %27, label %9
>
> 9: ; preds = %6, %5
> %.sroa.5.1 = phi ptr [ null, %5 ], [ %.sroa.5.0, %6 ]
> %10 = phi i32 [ 0, %5 ], [ %7, %6 ]
> %11 = phi i40 [ undef, %5 ], [ %19, %6 ]
> %12 = call ptr @_ZN5clang4Expr27IgnoreUnlessSpelledInSourceEv()
> %13 = load i8, ptr %1, align 8
> %14 = icmp ult i8 %13, -5
> %15 = and i40 %11, 4294967295
> br i1 %14, label %18, label %16
>
> 16: ; preds = %9
> %17 = load volatile { i64, i64 }, ptr null, align 4294967296
> br label %18
>
> 18: ; preds = %16, %9
> %19 = phi i40 [ %15, %9 ], [ %2, %16 ]
> %20 = icmp ugt ptr %.sroa.5.1, %0
> br i1 %20, label %6, label %21
>
> 21: ; preds = %18
> %22 = icmp eq ptr %.sroa.5.1, null
> %23 = zext i1 %22 to i64
> %24 = call ptr @_Znwm(i64 0)
> %25 = getelementptr %"struct.devtools::inliner::CallArg2.2307.4010.6850.7702.9690.1453.4566.8245.24575.24695.24965.25235.25445.26645.26877.26937.27207.27387.27673.27761.27849.27893.27915.27959.27981.28003.28016.28068.28120.28133.28185.28237.28250.50.102.115", ptr %3, i64 %23
> %26 = getelementptr i8, ptr %24, i64 8
> store i40 %19, ptr %26, align 4
> br label %6
>
> 27: ; preds = %6
> %28 = getelementptr %"struct.std::__u::__optional_destruct_base.2555.4258.7098.7950.9938.1701.4814.8493.24576.24696.24966.25236.25446.26646.26885.26945.27215.27395.27674.27762.27850.27894.27916.27960.27982.28004.28017.28069.28121.28134.28186.28238.28251.51.103.116", ptr %.sroa.0.0, i64 0, i32 1
> %29 = load i8, ptr %28, align 4
> %30 = icmp eq i8 %29, 0
> br i1 %30, label %32, label %31
>
> 31: ; preds = %27
> call void @__ubsan_handle_load_invalid_value_abort(ptr %0)
> unreachable
>
> 32: ; preds = %27
> ret void
>
> ; uselistorder directives
> uselistorder i32 %10, { 1, 0 }
> }
>
> define void @_ZN8devtools7inliner14ParseCallArgs2ERKN5clang8CallExprERKNS1_12FunctionDeclERNS1_10ASTContextE(ptr %0, ptr %1) {
> call void @_ZN8devtools7inliner14ParseCallArgs3ERKN5clang8CallExprERKNS1_12FunctionDeclERNS1_10ASTContextE(ptr %1, ptr %0, i40 0, ptr null, i32 0)
> ret void
> }
>
> declare ptr @_ZN5clang4Expr27IgnoreUnlessSpelledInSourceEv()
>
> declare void @__ubsan_handle_load_invalid_value_abort(ptr)
>
> declare ptr @_Znwm(i64)
>
> attributes #0 = { noinline "frame-pointer"="all" }
>
> Before this patch __ubsan_handle_load_invalid_value_abort was not reached, now it is.
>
> llc ./llvm-reduce-42f916.ll -O3 -o ./llvm-reduce-42f916.ll.<revision>.s
> diff -u --color ./llvm-reduce-42f916.ll.eb98abab2c83 <https://reviews.llvm.org/rGeb98abab2c83c9c101c4749c93836108657d6164>.s ./llvm-reduce-42f916.ll.b7836d856206 <https://reviews.llvm.org/rGb7836d856206ec39509d42529f958c920368166b>.s :
>
> --- ./llvm-reduce-42f916.ll.eb98abab2c83.s 2023-07-19 10:38:05.571863413 -0700
> +++ ./llvm-reduce-42f916.ll.b7836d856206.s 2023-07-19 10:39:47.587632219 -0700
> @@ -23,14 +23,14 @@
> .cfi_offset %r14, -32
> .cfi_offset %r15, -24
> movl %r8d, %r14d
> - movq %rcx, -64(%rbp) # 8-byte Spill
> - movq %rdx, -56(%rbp) # 8-byte Spill
> - movq %rsi, %r13
> + movq %rcx, -56(%rbp) # 8-byte Spill
> + movq %rdx, -48(%rbp) # 8-byte Spill
> + movq %rsi, %r12
> movq %rdi, %r15
> incl %r14d
> xorl %ebx, %ebx
> - # implicit-def: $r12
> - movq %rsi, -48(%rbp) # 8-byte Spill
> + # implicit-def: $rax
> + # kill: killed $rax
> jmp .LBB0_3
> .p2align 4, 0x90
> .LBB0_1: # in Loop: Header=BB0_3 Depth=1
> @@ -41,41 +41,37 @@
> xorl %edi, %edi
> callq _Znwm at PLT
> shlq $4, %r15
> - addq -64(%rbp), %r15 # 8-byte Folded Reload
> - movq %r12, %rcx
> + addq -56(%rbp), %r15 # 8-byte Folded Reload
> + movq -64(%rbp), %rdx # 8-byte Reload
> + movq %rdx, %rcx
> shrq $32, %rcx
> movb %cl, 12(%rax)
> - movl %r12d, 8(%rax)
> + movl %edx, 8(%rax)
> movq %r15, %rbx
> movq %r13, %r15
> - movq -48(%rbp), %r13 # 8-byte Reload
> decl %r14d
> - je .LBB0_8
> + je .LBB0_7
> .LBB0_3: # =>This Inner Loop Header: Depth=1
> callq _ZN5clang4Expr27IgnoreUnlessSpelledInSourceEv at PLT
> - cmpb $-5, (%r13)
> - jae .LBB0_5
> + cmpb $-5, (%r12)
> + jb .LBB0_5
> # %bb.4: # in Loop: Header=BB0_3 Depth=1
> - movl %r12d, %r12d
> - cmpq %r15, %rbx
> - jbe .LBB0_1
> - jmp .LBB0_7
> - .p2align 4, 0x90
> -.LBB0_5: # in Loop: Header=BB0_3 Depth=1
> movq 0, %rax
> movq 8, %rax
> - movq -56(%rbp), %r12 # 8-byte Reload
> + movq -48(%rbp), %rax # 8-byte Reload
> + movq %rax, -64(%rbp) # 8-byte Spill
> +.LBB0_5: # in Loop: Header=BB0_3 Depth=1
> cmpq %r15, %rbx
> jbe .LBB0_1
> -.LBB0_7: # in Loop: Header=BB0_3 Depth=1
> +# %bb.6: # in Loop: Header=BB0_3 Depth=1
> xorl %eax, %eax
> xorl %ebx, %ebx
> decl %r14d
> jne .LBB0_3
> -.LBB0_8:
> +.LBB0_7:
> cmpb $0, 12(%rax)
> - jne .LBB0_10
> -# %bb.9:
> + jne .LBB0_9
> +# %bb.8:
> addq $24, %rsp
> popq %rbx
> popq %r12
> @@ -85,7 +81,7 @@
> popq %rbp
> .cfi_def_cfa %rsp, 8
> retq
> -.LBB0_10:
> +.LBB0_9:
> .cfi_def_cfa %rbp, 16
> movq %r15, %rdi
> callq __ubsan_handle_load_invalid_value_abort at PLT
>
> No diff b7836d856206 <https://reviews.llvm.org/rGb7836d856206ec39509d42529f958c920368166b> vs 645f6dcd69a5 <https://reviews.llvm.org/rG645f6dcd69a5315dbe2a6b49fdd8d356512544e8>(HEAD)
I see the diff disappear if I make isCopyInstr skip isCopyInstrImpl. I'm not seeing anything clearly wrong with the codegen decisions here. The apparent block body removal in %bb.4, and the implicit_def; kill in the entry look suspicious, but I'm not seeing how it's wrong.
The isCopyInstrImpl changes 1 spill weight based on an identity mov. That just triggers a bunch of other different spilling decisions and you end up with different code.
If I step back and look at the original IR, it's branching on undef. From the entry in %bb, on the first branch to %bb.7, it's branching on undef here:
bb:
br label %bb7
bb7:
...
%i9 = phi i40 [ undef, %bb ], [ %i17, %bb5 ]
...
%i13 = and i40 %i9, 4294967295
br i1 %i12, label %bb16, label %bb14
Maybe this is just an artifact of reduction? If you try opt-bisect-limit, does it point at some other pass?
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D150388/new/
https://reviews.llvm.org/D150388
More information about the llvm-commits
mailing list