[PATCH] D118538: [SLP] Schedule only sub-graph of vectorizable instructions
Arthur Eubanks via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 1 15:25:47 PST 2022
aeubanks added a comment.
Based on some debug logs, it seems like the `llvm.stacksave()`/`llvm.stackrestore()`s are moving around, not the allocas.
Better reduced repro:
$ cat /tmp/a.ll
target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-a:0:32-S32"
target triple = "i386-pc-windows-msvc19.16.0"
declare i8* @llvm.stacksave()
declare void @llvm.stackrestore(i8*)
declare i8* @wibble(i8*)
declare void @quux(i32* inalloca(i32))
define void @ham() #1 {
%tmp2 = alloca i8
%tmp3 = alloca i8
%tmp4 = alloca i8
%tmp5 = alloca i8
%tmp12 = alloca [12 x i8*]
%tmp15 = call i8* @wibble(i8* %tmp2)
%tmp16 = call i8* @wibble(i8* %tmp3)
%tmp17 = call i8* @wibble(i8* %tmp4)
%tmp23 = call i8* @llvm.stacksave()
%tmp24 = alloca inalloca i32
call void @quux(i32* inalloca(i32) %tmp24)
call void @llvm.stackrestore(i8* %tmp23)
%tmp32 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 0
store i8* %tmp4, i8** %tmp32
%tmp33 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 1
store i8* %tmp4, i8** %tmp33
%tmp34 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 2
store i8* %tmp4, i8** %tmp34
%tmp35 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 3
store i8* %tmp4, i8** %tmp35
%tmp36 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 4
store i8* %tmp4, i8** %tmp36
%tmp37 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 5
store i8* %tmp5, i8** %tmp37
%tmp38 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 6
store i8* %tmp5, i8** %tmp38
%tmp39 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 7
store i8* %tmp5, i8** %tmp39
ret void
}
attributes #0 = { nofree nosync nounwind willreturn }
attributes #1 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" }
$ bin/opt -passes=slp-vectorizer /tmp/a.ll -S
target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-a:0:32-S32"
target triple = "i386-pc-windows-msvc19.16.0"
; Function Attrs: nofree nosync nounwind willreturn
declare i8* @llvm.stacksave() #0
; Function Attrs: nofree nosync nounwind willreturn
declare void @llvm.stackrestore(i8*) #0
declare i8* @wibble(i8*)
declare void @quux(i32* inalloca(i32))
define void @ham() #1 {
%tmp2 = alloca i8, align 1
%tmp3 = alloca i8, align 1
%tmp12 = alloca [12 x i8*], align 4
%tmp15 = call i8* @wibble(i8* %tmp2)
%tmp16 = call i8* @wibble(i8* %tmp3)
%tmp24 = alloca inalloca i32, align 4
%tmp32 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 0
%tmp33 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 1
%tmp34 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 2
%tmp35 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 3
%1 = bitcast i8** %tmp32 to <4 x i8*>*
%tmp36 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 4
%tmp37 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 5
%tmp38 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 6
%tmp39 = getelementptr inbounds [12 x i8*], [12 x i8*]* %tmp12, i32 0, i32 7
%tmp4 = alloca i8, align 1
%tmp5 = alloca i8, align 1
%tmp17 = call i8* @wibble(i8* %tmp4)
%tmp23 = call i8* @llvm.stacksave()
call void @quux(i32* inalloca(i32) %tmp24)
call void @llvm.stackrestore(i8* %tmp23)
%2 = insertelement <4 x i8*> poison, i8* %tmp4, i32 0
%shuffle = shufflevector <4 x i8*> %2, <4 x i8*> poison, <4 x i32> zeroinitializer
store <4 x i8*> %shuffle, <4 x i8*>* %1, align 4
%3 = insertelement <4 x i8*> %2, i8* %tmp5, i32 1
%shuffle1 = shufflevector <4 x i8*> %3, <4 x i8*> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
%4 = bitcast i8** %tmp36 to <4 x i8*>*
store <4 x i8*> %shuffle1, <4 x i8*>* %4, align 4
ret void
}
attributes #0 = { nofree nosync nounwind willreturn }
attributes #1 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" }
`%tmp24` is no longer between the `stacksave`/`stackrestore`
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D118538/new/
https://reviews.llvm.org/D118538
More information about the llvm-commits
mailing list